@Preamble{"\input bibnames.sty" #
"\def \TM {${}^{\sc TM}$}" #
"\ifx \undefined \bioname \def \bioname#1{{{\em #1\/}}} \fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-TKDD = "ACM Transactions on Knowledge
Discovery from Data (TKDD)"}
@Article{Han:2007:I,
author = "Jiawei Han",
title = "Introduction",
journal = j-TKDD,
volume = "1",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1217299.1217300",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:36 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Leskovec:2007:GED,
author = "Jure Leskovec and Jon Kleinberg and Christos
Faloutsos",
title = "Graph evolution: {Densification} and shrinking
diameters",
journal = j-TKDD,
volume = "1",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1217299.1217301",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:36 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How do real graphs evolve over time? What are normal
growth patterns in social, technological, and
information networks? Many studies have discovered
patterns in {\em static graphs}, identifying properties
in a single snapshot of a large network or in a very
small number of snapshots; these include heavy tails
for in- and out-degree distributions, communities,
small-world phenomena, and others. However, given the
lack of information about network evolution over long
periods, it has been hard to convert these findings
into statements about trends over time.\par
Here we study a wide range of real graphs, and we
observe some surprising phenomena. First, most of these
graphs densify over time with the number of edges
growing superlinearly in the number of nodes. Second,
the average distance between nodes often shrinks over
time in contrast to the conventional wisdom that such
distance parameters should increase slowly as a
function of the number of nodes (like $ O(\log n) $ or
$ O(\log (\log n))$).\par
Existing graph generation models do not exhibit these
types of behavior even at a qualitative level. We
provide a new graph generator, based on a forest fire
spreading process that has a simple, intuitive
justification, requires very few parameters (like the
flammability of nodes), and produces graphs exhibiting
the full range of properties observed both in prior
work and in the present study.\par
We also notice that the forest fire model exhibits a
sharp transition between sparse graphs and graphs that
are densifying. Graphs with decreasing distance between
the nodes are generated around this transition
point.\par
Last, we analyze the connection between the temporal
evolution of the degree distribution and densification
of a graph. We find that the two are fundamentally
related. We also observe that real networks exhibit
this type of relation between densification and the
degree distribution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Densification power laws; graph generators; graph
mining; heavy-tailed distributions; small-world
phenomena",
}
@Article{Machanavajjhala:2007:DPB,
author = "Ashwin Machanavajjhala and Daniel Kifer and Johannes
Gehrke and Muthuramakrishnan Venkitasubramaniam",
title = "{$L$}-diversity: {Privacy} beyond $k$-anonymity",
journal = j-TKDD,
volume = "1",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1217299.1217302",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:36 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Publishing data about individuals without revealing
sensitive information about them is an important
problem. In recent years, a new definition of privacy
called $k$-anonymity has gained popularity. In a
$k$-anonymized dataset, each record is
indistinguishable from at least $ k - 1$ other records
with respect to certain identifying attributes.\par
In this article, we show using two simple attacks that
a $k$-anonymized dataset has some subtle but severe
privacy problems. First, an attacker can discover the
values of sensitive attributes when there is little
diversity in those sensitive attributes. This is a
known problem. Second, attackers often have background
knowledge, and we show that $k$-anonymity does not
guarantee privacy against attackers using background
knowledge. We give a detailed analysis of these two
attacks, and we propose a novel and powerful privacy
criterion called $ \ell $-diversity that can defend
against such attacks. In addition to building a formal
foundation for $ \ell $-diversity, we show in an
experimental evaluation that $ \ell $-diversity is
practical and can be implemented efficiently.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "-diversity; Data privacy; ell-k-anonymity;
privacy-preserving data publishing",
}
@Article{Gionis:2007:CA,
author = "Aristides Gionis and Heikki Mannila and Panayiotis
Tsaparas",
title = "Clustering aggregation",
journal = j-TKDD,
volume = "1",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1217299.1217303",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:36 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We consider the following problem: given a set of
clusterings, find a single clustering that agrees as
much as possible with the input clusterings. This
problem, {\em clustering aggregation}, appears
naturally in various contexts. For example, clustering
categorical data is an instance of the clustering
aggregation problem; each categorical attribute can be
viewed as a clustering of the input rows where rows are
grouped together if they take the same value on that
attribute. Clustering aggregation can also be used as a
metaclustering method to improve the robustness of
clustering by combining the output of multiple
algorithms. Furthermore, the problem formulation does
not require a priori information about the number of
clusters; it is naturally determined by the
optimization function.\par
In this article, we give a formal statement of the
clustering aggregation problem, and we propose a number
of algorithms. Our algorithms make use of the
connection between clustering aggregation and the
problem of {\em correlation clustering}. Although the
problems we consider are NP-hard, for several of our
methods, we provide theoretical guarantees on the
quality of the solutions. Our work provides the best
deterministic approximation algorithm for the variation
of the correlation clustering problem we consider. We
also show how sampling can be used to scale the
algorithms for large datasets. We give an extensive
empirical evaluation demonstrating the usefulness of
the problem and of the solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "clustering aggregation; clustering categorical data;
correlation clustering; Data clustering",
}
@Article{Bhattacharya:2007:CER,
author = "Indrajit Bhattacharya and Lise Getoor",
title = "Collective entity resolution in relational data",
journal = j-TKDD,
volume = "1",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1217299.1217304",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:36 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Many databases contain uncertain and imprecise
references to real-world entities. The absence of
identifiers for the underlying entities often results
in a database which contains multiple references to the
same entity. This can lead not only to data redundancy,
but also inaccuracies in query processing and knowledge
extraction. These problems can be alleviated through
the use of {\em entity resolution}. Entity resolution
involves discovering the underlying entities and
mapping each database reference to these entities.
Traditionally, entities are resolved using pairwise
similarity over the attributes of references. However,
there is often additional relational information in the
data. Specifically, references to different entities
may cooccur. In these cases, collective entity
resolution, in which entities for cooccurring
references are determined jointly rather than
independently, can improve entity resolution accuracy.
We propose a novel relational clustering algorithm that
uses both attribute and relational information for
determining the underlying domain entities, and we give
an efficient implementation. We investigate the impact
that different relational similarity measures have on
entity resolution quality. We evaluate our collective
entity resolution algorithm on multiple real-world
databases. We show that it improves entity resolution
performance over both attribute-based baselines and
over algorithms that consider relational information
but do not resolve entities collectively. In addition,
we perform detailed experiments on synthetically
generated data to identify data characteristics that
favor collective relational resolution over purely
attribute-based algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "data cleaning; Entity resolution; graph clustering;
record linkage",
}
@Article{Loh:2007:EEL,
author = "Wei-Yin Loh and Chien-Wei Chen and Wei Zheng",
title = "Extrapolation errors in linear model trees",
journal = j-TKDD,
volume = "1",
number = "2",
pages = "6:1--6:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1267066.1267067",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:48 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Prediction errors from a linear model tend to be
larger when extrapolation is involved, particularly
when the model is wrong. This article considers the
problem of extrapolation and interpolation errors when
a linear model tree is used for prediction. It proposes
several ways to curtail the size of the errors, and
uses a large collection of real datasets to demonstrate
that the solutions are effective in reducing the
average mean squared prediction error. The article also
provides a proof that, if a linear model is correct,
the proposed solutions have no undesirable effects as
the training sample size tends to infinity.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Decision tree; prediction; regression; statistics",
}
@Article{Zhang:2007:MPP,
author = "Minghua Zhang and Ben Kao and David W. Cheung and
Kevin Y. Yip",
title = "Mining periodic patterns with gap requirement from
sequences",
journal = j-TKDD,
volume = "1",
number = "2",
pages = "7:1--7:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1267066.1267068",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:48 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We study a problem of mining frequently occurring
periodic patterns with a gap requirement from
sequences. Given a character sequence $S$ of length $L$
and a pattern $P$ of length $l$, we consider $P$ a
frequently occurring pattern in $S$ if the probability
of {\em observing\/} $P$ given a randomly picked
length-$l$ subsequence of $S$ exceeds a certain
threshold. In many applications, particularly those
related to bioinformatics, interesting patterns are
{\em periodic\/} with a {\em gap requirement}. That is
to say, the characters in $P$ should match subsequences
of $S$ in such a way that the matching characters in
$S$ are separated by gaps of more or less the same
size. We show the complexity of the mining problem and
discuss why traditional mining algorithms are
computationally infeasible. We propose practical
algorithms for solving the problem and study their
characteristics. We also present a case study in which
we apply our algorithms on some DNA sequences. We
discuss some interesting patterns obtained from the
case study.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "gap requirement; periodic pattern; Sequence mining",
}
@Article{Huang:2007:TTE,
author = "Jen-Wei Huang and Bi-Ru Dai and Ming-Syan Chen",
title = "{Twain}: {Two-end} association miner with precise
frequent exhibition periods",
journal = j-TKDD,
volume = "1",
number = "2",
pages = "8:1--8:??",
month = aug,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1267066.1267069",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:48 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We investigate the general model of mining
associations in a temporal database, where the
exhibition periods of items are allowed to be different
from one to another. The database is divided into
partitions according to the time granularity imposed.
Such temporal association rules allow us to observe
short-term but interesting patterns that are absent
when the whole range of the database is evaluated
altogether. Prior work may omit some temporal
association rules and thus have limited practicability.
To remedy this and to give more precise frequent
exhibition periods of frequent temporal itemsets, we
devise an efficient algorithm {\em Twain\/} (standing
for {\em TWo end AssocIation miNer\/}). {\em Twain\/}
not only generates frequent patterns with more precise
frequent exhibition periods, but also discovers more
interesting frequent patterns. {\em Twain\/} employs
Start time and End time of each item to provide precise
frequent exhibition period while progressively handling
itemsets from one partition to another. Along with one
scan of the database, {\em Twain\/} can generate
frequent 2-itemsets directly according to the
cumulative filtering threshold. Then, {\em Twain\/}
adopts the scan reduction technique to generate all
frequent $k$-itemsets ($k$ > 2) from the generated
frequent 2-itemsets. Theoretical properties of {\em
Twain\/} are derived as well in this article. The
experimental results show that {\em Twain\/}
outperforms the prior works in the quality of frequent
patterns, execution time, I/O cost, CPU overhead and
scalability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Association; temporal",
}
@Article{Bayardop:2007:ISI,
author = "Roberto Bayardop and Kristin P. Bennett and Gautam Das
and Dimitrios Gunopulos and Johannes Gunopulos",
title = "Introduction to special issue {ACM SIGKDD 2006}",
journal = j-TKDD,
volume = "1",
number = "3",
pages = "9:1--9:??",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1297332.1297333",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:56 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bohm:2007:RPF,
author = "Christian B{\"o}hm and Christos Faloutsos and Jia-Yu
Pan and Claudia Plant",
title = "{RIC}: {Parameter-free} noise-robust clustering",
journal = j-TKDD,
volume = "1",
number = "3",
pages = "10:1--10:??",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1297332.1297334",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:56 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How do we find a {\em natural\/} clustering of a
real-world point set which contains an unknown number
of clusters with different shapes, and which may be
contaminated by noise? As most clustering algorithms
were designed with certain assumptions (Gaussianity),
they often require the user to give input parameters,
and are sensitive to noise. In this article, we propose
a robust framework for determining a natural clustering
of a given dataset, based on the minimum description
length (MDL) principle. The proposed framework, {\em
robust information-theoretic clustering (RIC)}, is
orthogonal to any known clustering algorithm: Given a
preliminary clustering, RIC purifies these clusters
from noise, and adjusts the clusterings such that it
simultaneously determines the most natural amount and
shape (subspace) of the clusters. Our RIC method can be
combined with any clustering technique ranging from
K-means and K-medoids to advanced methods such as
spectral clustering. In fact, RIC is even able to
purify and improve an initial coarse clustering, even
if we start with very simple methods. In an extension,
we propose a fully automatic stand-alone clustering
method and efficiency improvements. RIC scales well
with the dataset size. Extensive experiments on
synthetic and real-world datasets validate the proposed
RIC framework.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Clustering; data summarization; noise robustness;
parameter-free data mining",
}
@Article{Mei:2007:SAF,
author = "Qiaozhu Mei and Dong Xin and Hong Cheng and Jiawei Han
and Chengxiang Zhai",
title = "Semantic annotation of frequent patterns",
journal = j-TKDD,
volume = "1",
number = "3",
pages = "11:1--11:??",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1297332.1297335",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:56 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Using frequent patterns to analyze data has been one
of the fundamental approaches in many data mining
applications. Research in frequent pattern mining has
so far mostly focused on developing efficient
algorithms to discover various kinds of frequent
patterns, but little attention has been paid to the
important next step --- interpreting the discovered
frequent patterns. Although the compression and
summarization of frequent patterns has been studied in
some recent work, the proposed techniques there can
only annotate a frequent pattern with nonsemantical
information (e.g., support), which provides only
limited help for a user to understand the
patterns.\par
In this article, we study the novel problem of
generating semantic annotations for frequent patterns.
The goal is to discover the hidden meanings of a
frequent pattern by annotating it with in-depth,
concise, and structured information. We propose a
general approach to generate such an annotation for a
frequent pattern by constructing its context model,
selecting informative context indicators, and
extracting representative transactions and semantically
similar patterns. This general approach can well
incorporate the user's prior knowledge, and has
potentially many applications, such as generating a
dictionary-like description for a pattern, finding
synonym patterns, discovering semantic relations, and
summarizing semantic classes of a set of frequent
patterns. Experiments on different datasets show that
our approach is effective in generating semantic
pattern annotations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Frequent pattern; pattern annotation; pattern context;
pattern semantic analysis",
}
@Article{Koren:2007:MEP,
author = "Yehuda Koren and Stephen C. North and Chris Volinsky",
title = "Measuring and extracting proximity graphs in
networks",
journal = j-TKDD,
volume = "1",
number = "3",
pages = "12:1--12:??",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1297332.1297336",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:56 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Measuring distance or some other form of proximity
between objects is a standard data mining tool.
Connection subgraphs were recently proposed as a way to
demonstrate proximity between nodes in networks. We
propose a new way of measuring and extracting proximity
in networks called ``cycle-free effective conductance''
(CFEC). Importantly, the measured proximity is
accompanied with a {\em proximity subgraph\/} which
allows assessing and understanding measured values. Our
proximity calculation can handle more than two
endpoints, directed edges, is statistically well
behaved, and produces an effectiveness score for the
computed subgraphs. We provide an efficient algorithm
to measure and extract proximity. Also, we report
experimental results and show examples for four large
network datasets: a telecommunications calling graph,
the IMDB actors graph, an academic coauthorship
network, and a movie recommendation system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Connection subgraph; cycle-free escape probability;
escape probability; graph mining; proximity; proximity
subgraph; random walk",
}
@Article{Ihler:2007:LDE,
author = "Alexander Ihler and Jon Hutchins and Padhraic Smyth",
title = "Learning to detect events with {Markov}-modulated
{Poisson} processes",
journal = j-TKDD,
volume = "1",
number = "3",
pages = "13:1--13:??",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1297332.1297337",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:56 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Time-series of count data occur in many different
contexts, including Internet navigation logs, freeway
traffic monitoring, and security logs associated with
buildings. In this article we describe a framework for
detecting anomalous events in such data using an
unsupervised learning approach. Normal periodic
behavior is modeled via a time-varying Poisson process
model, which in turn is modulated by a hidden Markov
process that accounts for bursty events. We outline a
Bayesian framework for learning the parameters of this
model from count time-series. Two large real-world
datasets of time-series counts are used as testbeds to
validate the approach, consisting of freeway traffic
data and logs of people entering and exiting a
building. We show that the proposed model is
significantly more accurate at detecting known events
than a more traditional threshold-based technique. We
also describe how the model can be used to investigate
different degrees of periodicity in the data, including
systematic day-of-week and time-of-day effects, and to
make inferences about different aspects of events such
as number of vehicles or people involved. The results
indicate that the Markov-modulated Poisson framework
provides a robust and accurate framework for adaptively
and autonomously learning how to separate unusual
bursty events from traces of normal human activity.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Event detection; Markov modulated; Poisson",
}
@Article{Gionis:2007:ADM,
author = "Aristides Gionis and Heikki Mannila and Taneli
Mielik{\"a}inen and Panayiotis Tsaparas",
title = "Assessing data mining results via swap randomization",
journal = j-TKDD,
volume = "1",
number = "3",
pages = "14:1--14:??",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1297332.1297338",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:58:56 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The problem of assessing the significance of data
mining results on high-dimensional 0--1 datasets has
been studied extensively in the literature. For
problems such as mining frequent sets and finding
correlations, significance testing can be done by
standard statistical tests such as chi-square, or other
methods. However, the results of such tests depend only
on the specific attributes and not on the dataset as a
whole. Moreover, the tests are difficult to apply to
sets of patterns or other complex results of data
mining algorithms. In this article, we consider a
simple randomization technique that deals with this
shortcoming. The approach consists of producing random
datasets that have the same row and column margins as
the given dataset, computing the results of interest on
the randomized instances and comparing them to the
results on the actual data. This randomization
technique can be used to assess the results of many
different types of data mining algorithms, such as
frequent sets, clustering, and spectral analysis. To
generate random datasets with given margins, we use
variations of a Markov chain approach which is based on
a simple swap operation. We give theoretical results on
the efficiency of different randomization methods, and
apply the swap randomization method to several
well-known datasets. Our results indicate that for some
datasets the structure discovered by the data mining
algorithms is expected, given the row and column
margins of the datasets, while for other datasets the
discovered structure conveys information that is not
captured by the margin counts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "0--1 data; randomization tests; Significance testing;
swaps",
}
@Article{Tang:2008:TTA,
author = "Lei Tang and Huan Liu and Jianping Zhang and Nitin
Agarwal and John J. Salerno",
title = "Topic taxonomy adaptation for group profiling",
journal = j-TKDD,
volume = "1",
number = "4",
pages = "1:1--1:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324172.1324173",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:07 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "A topic taxonomy is an effective representation that
describes salient features of virtual groups or online
communities. A topic taxonomy consists of topic nodes.
Each internal node is defined by its vertical path
(i.e., ancestor and child nodes) and its horizontal
list of attributes (or terms). In a text-dominant
environment, a topic taxonomy can be used to flexibly
describe a group's interests with varying granularity.
However, the stagnant nature of a taxonomy may fail to
timely capture the dynamic change of a group's
interest. This article addresses the problem of how to
adapt a topic taxonomy to the accumulated data that
reflects the change of a group's interest to achieve
dynamic group profiling. We first discuss the issues
related to topic taxonomy. We next formulate taxonomy
adaptation as an optimization problem to find the
taxonomy that best fits the data. We then present a
viable algorithm that can efficiently accomplish
taxonomy adaptation. We conduct extensive experiments
to evaluate our approach's efficacy for group
profiling, compare the approach with some alternatives,
and study its performance for dynamic group profiling.
While pointing out various applications of taxonomy
adaption, we suggest some future work that can take
advantage of burgeoning Web 2.0 services for online
targeted marketing, counterterrorism in connecting
dots, and community tracking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "dynamic profiling; group interest; taxonomy
adjustment; text hierarchical classification; Topic
taxonomy",
}
@Article{Cormode:2008:FHH,
author = "Graham Cormode and Flip Korn and S. Muthukrishnan and
Divesh Srivastava",
title = "Finding hierarchical heavy hitters in streaming data",
journal = j-TKDD,
volume = "1",
number = "4",
pages = "2:1--2:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324172.1324174",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:07 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Data items that arrive online as streams typically
have attributes which take values from one or more
hierarchies (time and geographic location, source and
destination IP addresses, etc.). Providing an aggregate
view of such data is important for summarization,
visualization, and analysis. We develop an aggregate
view based on certain organized sets of large-valued
regions (``heavy hitters'') corresponding to
hierarchically discounted frequency counts. We formally
define the notion of {\em hierarchical heavy hitters\/}
(HHHs). We first consider computing (approximate) HHHs
over a data stream drawn from a single hierarchical
attribute. We formalize the problem and give
deterministic algorithms to find them in a single pass
over the input.\par
In order to analyze a wider range of realistic data
streams (e.g., from IP traffic-monitoring
applications), we generalize this problem to multiple
dimensions. Here, the semantics of HHHs are more
complex, since a ``child'' node can have multiple
``parent'' nodes. We present online algorithms that
find approximate HHHs in one pass, with provable
accuracy guarantees. The product of hierarchical
dimensions forms a mathematical lattice structure. Our
algorithms exploit this structure, and so are able to
track approximate HHHs using only a small, fixed number
of statistics per stored item, regardless of the number
of dimensions.\par
We show experimentally, using real data, that our
proposed algorithms yields outputs which are very
similar (virtually identical, in many cases) to offline
computations of the exact solutions, whereas
straightforward heavy-hitters-based approaches give
significantly inferior answer quality. Furthermore, the
proposed algorithms result in an order of magnitude
savings in data structure size while performing
competitively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "approximation algorithms; Data mining; network data
analysis",
}
@Article{Somaiya:2008:LCU,
author = "Manas Somaiya and Christopher Jermaine and Sanjay
Ranka",
title = "Learning correlations using the mixture-of-subsets
model",
journal = j-TKDD,
volume = "1",
number = "4",
pages = "3:1--3:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324172.1324175",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:07 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Using a mixture of random variables to model data is a
tried-and-tested method common in data mining, machine
learning, and statistics. By using mixture modeling it
is often possible to accurately model even complex,
multimodal data via very simple components. However,
the classical mixture model assumes that a data point
is generated by a single component in the model. A lot
of datasets can be modeled closer to the underlying
reality if we drop this restriction. We propose a
probabilistic framework, the {\em mixture-of-subsets
(MOS) model}, by making two fundamental changes to the
classical mixture model. First, we allow a data point
to be generated by a set of components, rather than
just a single component. Next, we limit the number of
data attributes that each component can influence. We
also propose an EM framework to learn the MOS model
from a dataset, and experimentally evaluate it on real,
high-dimensional datasets. Our results show that the
MOS model learned from the data represents the
underlying nature of the data accurately.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "EM algorithm; high-dimensional data; Mixture
modeling",
}
@Article{Halkidi:2008:CFB,
author = "M. Halkidi and D. Gunopulos and M. Vazirgiannis and N.
Kumar and C. Domeniconi",
title = "A clustering framework based on subjective and
objective validity criteria",
journal = j-TKDD,
volume = "1",
number = "4",
pages = "4:1--4:??",
month = jan,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324172.1324176",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:07 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering, as an unsupervised learning process is a
challenging problem, especially in cases of
high-dimensional datasets. Clustering result quality
can benefit from user constraints and objective
validity assessment. In this article, we propose a
semisupervised framework for learning the weighted
Euclidean subspace, where the best clustering can be
achieved. Our approach capitalizes on: (i) user
constraints; and (ii) the quality of intermediate
clustering results in terms of their structural
properties. The proposed framework uses the clustering
algorithm and the validity measure as its parameters.
We develop and discuss algorithms for learning and
tuning the weights of contributing dimensions and
defining the ``best'' clustering obtained by satisfying
user constraints. Experimental results on benchmark
datasets demonstrate the superiority of the proposed
approach in terms of improved clustering accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "cluster validity; data mining; Semisupervised
learning; similarity measure learning; space learning",
}
@Article{Zaki:2008:ISI,
author = "Mohammed J. Zaki and George Karypis and Jiong Yang and
Wei Wang",
title = "Introduction to special issue on bioinformatics",
journal = j-TKDD,
volume = "2",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1342320.1342321",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:18 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jin:2008:CMM,
author = "Ying Jin and T. M. Murali and Naren Ramakrishnan",
title = "Compositional mining of multirelational biological
datasets",
journal = j-TKDD,
volume = "2",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1342320.1342322",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:18 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "High-throughput biological screens are yielding
ever-growing streams of information about multiple
aspects of cellular activity. As more and more
categories of datasets come online, there is a
corresponding multitude of ways in which inferences can
be chained across them, motivating the need for
compositional data mining algorithms. In this article,
we argue that such compositional data mining can be
effectively realized by functionally cascading
redescription mining and biclustering algorithms as
primitives. Both these primitives mirror shifts of
vocabulary that can be composed in arbitrary ways to
create rich chains of inferences. Given a relational
database and its schema, we show how the schema can be
automatically compiled into a compositional data mining
program, and how different domains in the schema can be
related through logical sequences of biclustering and
redescription invocations. This feature allows us to
rapidly prototype new data mining applications,
yielding greater understanding of scientific datasets.
We describe two applications of compositional data
mining: (i) matching terms across categories of the
Gene Ontology and (ii) understanding the molecular
mechanisms underlying stress response in human cells.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Biclustering; bioinformatics; compositional data
mining; inductive logic programming; redescription
mining",
}
@Article{Sahay:2008:DSB,
author = "Saurav Sahay and Sougata Mukherjea and Eugene
Agichtein and Ernest V. Garcia and Shamkant B. Navathe
and Ashwin Ram",
title = "Discovering semantic biomedical relations utilizing
the {Web}",
journal = j-TKDD,
volume = "2",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1342320.1342323",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:18 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "To realize the vision of a Semantic Web for Life
Sciences, discovering relations between resources is
essential. It is very difficult to automatically
extract relations from Web pages expressed in natural
language formats. On the other hand, because of the
explosive growth of information, it is difficult to
manually extract the relations. In this paper we
present techniques to automatically discover relations
between biomedical resources from the Web. For this
purpose we retrieve relevant information from Web
Search engines and Pubmed database using various
lexico-syntactic patterns as queries over SOAP web
services. The patterns are initially handcrafted but
can be progressively learnt. The extracted relations
can be used to construct and augment ontologies and
knowledge bases. Experiments are presented for general
biomedical relation discovery and domain specific
search to show the usefulness of our technique.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Ontology construction; relation identification",
}
@Article{Ye:2008:DSA,
author = "Jieping Ye and Jianhui Chen and Ravi Janardan and
Sudhir Kumar",
title = "Developmental stage annotation of \bioname{Drosophila}
gene expression pattern images via an entire solution
path for {LDA}",
journal = j-TKDD,
volume = "2",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1342320.1342324",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:18 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Gene expression in a developing embryo occurs in
particular cells (spatial patterns) in a time-specific
manner (temporal patterns), which leads to the
differentiation of cell fates. Images of a
\bioname{Drosophila melanogaster} embryo at a given
developmental stage, showing a particular gene
expression pattern revealed by a gene-specific probe,
can be compared for spatial overlaps. The comparison is
fundamentally important to formulating and testing gene
interaction hypotheses. Expression pattern comparison
is most biologically meaningful when images from a
similar time point (developmental stage) are compared.
In this paper, we present LdaPath, a novel formulation
of Linear Discriminant Analysis (LDA) for automatic
developmental stage range classification. It employs
multivariate linear regression with the {$ L_1 $}-norm
penalty controlled by a regularization parameter for
feature extraction and visualization. LdaPath computes
an entire solution path for all values of
regularization parameter with essentially the same
computational cost as fitting one LDA model. Thus, it
facilitates efficient model selection. It is based on
the equivalence relationship between LDA and the least
squares method for multiclass classifications. This
equivalence relationship is established under a mild
condition, which we show empirically to hold for many
high-dimensional datasets, such as expression pattern
images. Our experiments on a collection of 2705
expression pattern images show the effectiveness of the
proposed algorithm. Results also show that the LDA
model resulting from LdaPath is sparse, and irrelevant
features may be removed. Thus, LdaPath provides a
general framework for simultaneous feature selection
and feature extraction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "dimensionality reduction; Gene expression pattern
image; linear discriminant analysis; linear
regression",
}
@Article{Lu:2008:ADA,
author = "Yijuan Lu and Qi Tian and Jennifer Neary and Feng Liu
and Yufeng Wang",
title = "Adaptive discriminant analysis for microarray-based
classification",
journal = j-TKDD,
volume = "2",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1342320.1342325",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:18 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Microarray technology has generated enormous amounts
of high-dimensional gene expression data, providing a
unique platform for exploring gene regulatory networks.
However, the curse of dimensionality plagues effort to
analyze these high throughput data. Linear Discriminant
Analysis (LDA) and Biased Discriminant Analysis (BDA)
are two popular techniques for dimension reduction,
which pay attention to different roles of the positive
and negative samples in finding discriminating
subspace. However, the drawbacks of these two methods
are obvious: LDA has limited efficiency in classifying
sample data from subclasses with different
distributions, and BDA does not account for the
underlying distribution of negative samples.\par
In this paper, we propose a novel dimension reduction
technique for microarray analysis: Adaptive
Discriminant Analysis (ADA), which effectively exploits
favorable attributes of both BDA and LDA and avoids
their unfavorable ones. ADA can find a good
discriminative subspace with adaptation to different
sample distributions. It not only alleviates the
problem of high dimensionality, but also enhances the
classification performance in the subspace with
na{\"\i}ve Bayes classifier. To learn the best model
fitting the real scenario, boosted Adaptive
Discriminant Analysis is further proposed. Extensive
experiments on the yeast cell cycle regulation data
set, and the expression data of the red blood cell
cycle in malaria parasite {\em Plasmodium falciparum\/}
demonstrate the superior performance of ADA and boosted
ADA. We also present some putative genes of specific
functional classes predicted by boosted ADA. Their
potential functionality is confirmed by independent
predictions based on Gene Ontology, demonstrating that
ADA and boosted ADA are effective dimension reduction
methods for microarray-based classification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "ADA; BDA; boosted ADA; dimension reduction; LDA;
microarray",
}
@Article{Hashimoto:2008:NEP,
author = "Kosuke Hashimoto and Kiyoko Flora Aoki-Kinoshita and
Nobuhisa Ueda and Minoru Kanehisa and Hiroshi
Mamitsuka",
title = "A new efficient probabilistic model for mining labeled
ordered trees applied to glycobiology",
journal = j-TKDD,
volume = "2",
number = "1",
pages = "6:1--6:??",
month = mar,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1342320.1342326",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:18 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Mining frequent patterns from large datasets is an
important issue in data mining. Recently, complex and
unstructured (or semi-structured) datasets have
appeared as targets for major data mining applications,
including text mining, web mining and bioinformatics.
Our work focuses on labeled ordered trees, which are
typically semi-structured datasets. In bioinformatics,
carbohydrate sugar chains, or glycans, can be modeled
as labeled ordered trees. Glycans are the third major
class of biomolecules, having important roles in
signaling and recognition. For mining labeled ordered
trees, we propose a new probabilistic model and its
efficient learning scheme which significantly improves
the time and space complexity of an existing
probabilistic model for labeled ordered trees. We
evaluated the performance of the proposed model,
comparing it with those of other probabilistic models,
using synthetic as well as real datasets from
glycobiology. Experimental results showed that the
proposed model drastically reduced the computation time
of the competing model, keeping the predictive power
and avoiding overfitting to the training data. Finally,
we assessed our results on real data from a variety of
biological viewpoints, verifying known facts in
glycobiology.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Expectation-maximization; labeled ordered trees;
maximum likelihood; probabilistic models",
}
@Article{Ge:2008:JCA,
author = "Rong Ge and Martin Ester and Byron J. Gao and Zengjian
Hu and Binay Bhattacharya and Boaz Ben-Moshe",
title = "Joint cluster analysis of attribute data and
relationship data: {The} connected $k$-center problem,
algorithms and applications",
journal = j-TKDD,
volume = "2",
number = "2",
pages = "7:1--7:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376815.1376816",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:30 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Attribute data and relationship data are two principal
types of data, representing the intrinsic and extrinsic
properties of entities. While attribute data have been
the main source of data for cluster analysis,
relationship data such as social networks or metabolic
networks are becoming increasingly available. It is
also common to observe both data types carry
complementary information such as in market
segmentation and community identification, which calls
for a joint cluster analysis of both data types so as
to achieve better results. In this article, we
introduce the novel Connected $k$-Center ({\em CkC\/})
problem, a clustering model taking into account
attribute data as well as relationship data. We analyze
the complexity of the problem and prove its
NP-hardness. Therefore, we analyze the approximability
of the problem and also present a constant factor
approximation algorithm. For the special case of the
{\em CkC\/} problem where the relationship data form a
tree structure, we propose a dynamic programming method
giving an optimal solution in polynomial time. We
further present NetScan, a heuristic algorithm that is
efficient and effective for large real databases. Our
extensive experimental evaluation on real datasets
demonstrates the meaningfulness and accuracy of the
NetScan results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "approximation algorithms; Attribute data; community
identification; document clustering; joint cluster
analysis; market segmentation; NP-hardness;
relationship data",
}
@Article{Gupta:2008:BBC,
author = "Gunjan Gupta and Joydeep Ghosh",
title = "{Bregman} bubble clustering: a robust framework for
mining dense clusters",
journal = j-TKDD,
volume = "2",
number = "2",
pages = "8:1--8:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376815.1376817",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:30 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In classical clustering, each data point is assigned
to at least one cluster. However, in many applications
only a small subset of the available data is relevant
for the problem and the rest needs to be ignored in
order to obtain good clusters. Certain nonparametric
density-based clustering methods find the most relevant
data as multiple dense regions, but such methods are
generally limited to low-dimensional data and do not
scale well to large, high-dimensional datasets. Also,
they use a specific notion of ``distance'', typically
Euclidean or Mahalanobis distance, which further limits
their applicability. On the other hand, the recent One
Class Information Bottleneck (OC-IB) method is fast and
works on a large class of distortion measures known as
Bregman Divergences, but can only find a {\em single\/}
dense region. This article presents a broad framework
for finding $k$ dense clusters while ignoring the rest
of the data. It includes a seeding algorithm that can
automatically determine a suitable value for {\em k}.
When $k$ is forced to 1, our method gives rise to an
improved version of OC-IB with optimality guarantees.
We provide a generative model that yields the proposed
iterative algorithm for finding $k$ dense regions as a
special case. Our analysis reveals an interesting and
novel connection between the problem of finding dense
regions and exponential mixture models; a hard model
corresponding to $k$ exponential mixtures with a
uniform background results in a set of $k$ dense
clusters. The proposed method describes a highly
scalable algorithm for finding multiple dense regions
that works with any Bregman Divergence, thus extending
density based clustering to a variety of non-Euclidean
problems not addressable by earlier methods. We present
empirical results on three artificial, two microarray
and one text dataset to show the relevance and
effectiveness of our methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Bregman divergences; Density-based clustering;
expectation maximization; exponential family; One Class
classification",
}
@Article{Tan:2008:TMG,
author = "Henry Tan and Fedja Hadzic and Tharam S. Dillon and
Elizabeth Chang and Ling Feng",
title = "Tree model guided candidate generation for mining
frequent subtrees from {XML} documents",
journal = j-TKDD,
volume = "2",
number = "2",
pages = "9:1--9:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376815.1376818",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:30 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Due to the inherent flexibilities in both structure
and semantics, XML association rules mining faces few
challenges, such as: a more complicated hierarchical
data structure and ordered data context. Mining
frequent patterns from XML documents can be recast as
mining frequent tree structures from a database of XML
documents. In this study, we model a database of XML
documents as a database of rooted labeled ordered
subtrees. In particular, we are mainly concerned with
mining frequent induced and embedded ordered subtrees.
Our main contributions are as follows. We describe our
unique {\em embedding list\/} representation of the
tree structure, which enables efficient implementation
of our {\em Tree Model Guided\/} ({\em TMG\/})
candidate generation. {\em TMG\/} is an optimal,
nonredundant enumeration strategy that enumerates all
the valid candidates that conform to the structural
aspects of the data. We show through a mathematical
model and experiments that {\em TMG\/} has better
complexity compared to the commonly used join approach.
In this article, we propose two algorithms, MB3-Miner
and iMB3-Miner. MB3-Miner mines embedded subtrees.
iMB3-Miner mines induced and/or embedded subtrees by
using the {\em maximum level of embedding constraint}.
Our experiments with both synthetic and real datasets
against two well-known algorithms for mining induced
and embedded subtrees, demonstrate the effectiveness
and the efficiency of the proposed techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "FREQT; TMG; Tree mining; tree model guided;
TreeMiner",
}
@Article{Islam:2008:STS,
author = "Aminul Islam and Diana Inkpen",
title = "Semantic text similarity using corpus-based word
similarity and string similarity",
journal = j-TKDD,
volume = "2",
number = "2",
pages = "10:1--10:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376815.1376819",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:30 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present a method for measuring the semantic
similarity of texts using a corpus-based measure of
semantic word similarity and a normalized and modified
version of the Longest Common Subsequence (LCS) string
matching algorithm. Existing methods for computing text
similarity have focused mainly on either large
documents or individual words. We focus on computing
the similarity between two sentences or two short
paragraphs. The proposed method can be exploited in a
variety of applications involving textual knowledge
representation and knowledge discovery. Evaluation
results on two different data sets show that our method
outperforms several competing methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "corpus-based measures; Semantic similarity of words;
similarity of short texts",
}
@Article{Sun:2008:ITA,
author = "Jimeng Sun and Dacheng Tao and Spiros Papadimitriou
and Philip S. Yu and Christos Faloutsos",
title = "Incremental tensor analysis: {Theory} and
applications",
journal = j-TKDD,
volume = "2",
number = "3",
pages = "11:1--11:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1409620.1409621",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:41 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How do we find patterns in author-keyword
associations, evolving over time? Or in data cubes
(tensors), with product-branchcustomer sales
information? And more generally, how to summarize
high-order data cubes (tensors)? How to incrementally
update these patterns over time? Matrix decompositions,
like principal component analysis (PCA) and variants,
are invaluable tools for mining, dimensionality
reduction, feature selection, rule identification in
numerous settings like streaming data, text, graphs,
social networks, and many more settings. However, they
have only two orders (i.e., matrices, like author and
keyword in the previous example).\par
We propose to envision such higher-order data as
tensors, and tap the vast literature on the topic.
However, these methods do not necessarily scale up, let
alone operate on semi-infinite streams. Thus, we
introduce a general framework, incremental tensor
analysis (ITA), which efficiently computes a compact
summary for high-order and high-dimensional data, and
also reveals the hidden correlations. Three variants of
ITA are presented: (1) dynamic tensor analysis (DTA);
(2) streaming tensor analysis (STA); and (3)
window-based tensor analysis (WTA). In particular, we
explore several fundamental design trade-offs such as
space efficiency, computational cost, approximation
accuracy, time dependency, and model complexity.\par
We implement all our methods and apply them in several
real settings, such as network anomaly detection,
multiway latent semantic indexing on citation networks,
and correlation study on sensor measurements. Our
empirical studies show that the proposed methods are
fast and accurate and that they find interesting
patterns and outliers on the real datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "multilinear algebra; stream mining; Tensor",
}
@Article{Mangasarian:2008:PPC,
author = "Olvi L. Mangasarian and Edward W. Wild and Glenn M.
Fung",
title = "Privacy-preserving classification of vertically
partitioned data via random kernels",
journal = j-TKDD,
volume = "2",
number = "3",
pages = "12:1--12:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1409620.1409622",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:41 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We propose a novel privacy-preserving support vector
machine (SVM) classifier for a data matrix $A$ whose
input feature columns are divided into groups belonging
to different entities. Each entity is unwilling to
share its group of columns or make it public. Our
classifier is based on the concept of a reduced kernel
$ k(A, B \prime)$, where $ B \prime $ is the transpose
of a random matrix $B$. The column blocks of $B$
corresponding to the different entities are privately
generated by each entity and never made public. The
proposed linear or nonlinear SVM classifier, which is
public but does not reveal any of the privately held
data, has accuracy comparable to that of an ordinary
SVM classifier that uses the entire set of input
features directly.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Privacy preserving classification; support vector
machines; vertically partitioned data",
}
@Article{Lakshmanan:2008:DRA,
author = "Laks V. S. Lakshmanan and Raymond T. Ng and Ganesh
Ramesh",
title = "On disclosure risk analysis of anonymized itemsets in
the presence of prior knowledge",
journal = j-TKDD,
volume = "2",
number = "3",
pages = "13:1--13:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1409620.1409623",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:41 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Decision makers of companies often face the dilemma of
whether to release data for knowledge discovery,
vis-a-vis the risk of disclosing proprietary or
sensitive information. Among the various methods
employed for ``sanitizing'' the data prior to
disclosure, we focus in this article on anonymization,
given its widespread use in practice. We do due
diligence to the question ``just how safe is the
anonymized data?'' We consider both those scenarios
when the hacker has no information and, more
realistically, when the hacker may have partial
information about items in the domain. We conduct our
analyses in the context of frequent set mining and
address the safety question at two different levels:
(i) how likely of being cracked (i.e., re-identified by
a hacker), are the identities of individual items and
(ii) how likely are sets of items cracked? For
capturing the prior knowledge of the hacker, we propose
a {\em belief function}, which amounts to an educated
guess of the frequency of each item. For various
classes of belief functions which correspond to
different degrees of prior knowledge, we derive
formulas for computing the expected number of cracks of
single items and for itemsets, the probability of
cracking the itemsets. While obtaining, exact values
for more general situations is computationally hard, we
propose a series of heuristics called the {\em
O-estimates}. They are easy to compute and are shown
fairly accurate, justified by empirical results on real
benchmark datasets. Based on the O-estimates, we
propose a recipe for the decision makers to resolve
their dilemma. Our recipe operates at two different
levels, depending on whether the data owner wants to
reason in terms of single items or sets of items (or
both). Finally, we present techniques for ascertaining
a hacker's knowledge of correlation in terms of
co-occurrence of items likely. This information
regarding the hacker's knowledge can be incorporated
into our framework of disclosure risk analysis and we
present experimental results demonstrating how this
knowledge affects the heuristic estimates we have
developed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "anonymization; belief function; bipartite graphs;
correlation; Disclosure risk; frequent itemsets;
hacker; matching; prior knowledge; sampling",
}
@Article{Vaidya:2008:PPD,
author = "Jaideep Vaidya and Chris Clifton and Murat
Kantarcioglu and A. Scott Patterson",
title = "Privacy-preserving decision trees over vertically
partitioned data",
journal = j-TKDD,
volume = "2",
number = "3",
pages = "14:1--14:??",
month = oct,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1409620.1409624",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:41 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Privacy and security concerns can prevent sharing of
data, derailing data-mining projects. Distributed
knowledge discovery, if done correctly, can alleviate
this problem. We introduce a generalized
privacy-preserving variant of the ID3 algorithm for
vertically partitioned data distributed over two or
more parties. Along with a proof of security, we
discuss what would be necessary to make the protocols
completely secure. We also provide experimental
results, giving a first demonstration of the practical
complexity of secure multiparty computation-based data
mining.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Decision tree classification; privacy",
}
@Article{Chuang:2009:FPS,
author = "Kun-Ta Chuang and Hung-Leng Chen and Ming-Syan Chen",
title = "Feature-preserved sampling over streaming data",
journal = j-TKDD,
volume = "2",
number = "4",
pages = "15:1--15:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1460797.1460798",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:51 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this article, we explore a novel sampling model,
called {\em feature preserved sampling\/} ({\em FPS\/})
that sequentially generates a high-quality sample over
sliding windows. The sampling quality we consider
refers to the degree of consistency between the sample
proportion and the population proportion of each
attribute value in a window. Due to the time-variant
nature of real-world datasets, users are more likely to
be interested in the most recent data. However,
previous works have not been able to generate a
high-quality sample over sliding windows that precisely
preserves up-to-date population characteristics.
Motivated by this shortcoming, we have developed the
{\em FPS\/} algorithm, which has several advantages:
(1) it sequentially generates a sample from a
time-variant data source over sliding windows; (2) the
execution time of {\em FPS\/} is linear with respect to
the database size; (3) the {\em relative\/}
proportional differences between the sample proportions
and population proportions of most distinct attribute
values are guaranteed to be below a specified error
threshold, $ \epsilon $, while the {\em relative\/}
proportion differences of the remaining attribute
values are as close to $ \epsilon $ as possible, which
ensures that the generated sample is of high quality;
(4) the sample rate is close to the user specified rate
so that a high quality sampling result can be obtained
without increasing the sample size; (5) by a thorough
analytical and empirical study, we prove that {\em
FPS\/} has acceptable space overheads, especially when
the attribute values have Zipfian distributions, and
{\em FPS\/} can also excellently preserve the
population proportion of multivariate features in the
sample; and (6) {\em FPS\/} can be applied to infinite
streams and finite datasets equally, and the generated
samples can be used for various applications. Our
experiments on both real and synthetic data validate
that {\em FPS\/} can effectively obtain a high quality
sample of the desired size. In addition, while using
the sample generated by {\em FPS\/} in various mining
applications, a significant improvement in efficiency
can be achieved without compromising the model's
precision.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "sampling; Streaming mining",
}
@Article{Jiang:2009:MFC,
author = "Daxin Jiang and Jian Pei",
title = "Mining frequent cross-graph quasi-cliques",
journal = j-TKDD,
volume = "2",
number = "4",
pages = "16:1--16:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1460797.1460799",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:51 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Joint mining of multiple datasets can often discover
interesting, novel, and reliable patterns which cannot
be obtained solely from any single source. For example,
in bioinformatics, jointly mining multiple gene
expression datasets obtained by different labs or
during various biological processes may overcome the
heavy noise in the data. Moreover, by joint mining of
gene expression data and protein-protein interaction
data, we may discover clusters of genes which show
coherent expression patterns and also produce
interacting proteins. Such clusters may be potential
pathways.\par
In this article, we investigate a novel data mining
problem, {\em mining frequent cross-graph
quasi-cliques}, which is generalized from several
interesting applications in bioinformatics,
cross-market customer segmentation, social network
analysis, and Web mining. In a graph, a set of vertices
$S$ is a $ \gamma $-quasi-clique $ (0 < \gamma \leq 1)$
if each vertex $v$ in $S$ directly connects to at least
$ \gamma \cdot (|S| - 1)$ other vertices in $S$. Given
a set of graphs $ G_1, \ldots {}, G_n$ and parameter $
{\rm min \_ sup} (0 < {\rm min \_ sup} 1)$, a set of
vertices $S$ is a frequent cross-graph quasi-clique if
$S$ is a $ \gamma $-quasi-clique in at least $ {\rm min
\_ sup} \cdot n$ graphs, and there does not exist a
proper superset of $S$ having the property.\par
We build a general model, show why the complete set of
frequent cross-graph quasi-cliques cannot be found by
previous data mining methods, and study the complexity
of the problem. While the problem is difficult, we
develop practical algorithms which exploit several
interesting and effective techniques and heuristics to
efficaciously mine frequent cross-graph quasi-cliques.
A systematic performance study is reported on both
synthetic and real data sets. We demonstrate some
interesting and meaningful frequent cross-graph
quasi-cliques in bioinformatics. The experimental
results also show that our algorithms are efficient and
scalable.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "bioinformatics; clique; Graph mining; joint mining",
}
@Article{Domeniconi:2009:WCE,
author = "Carlotta Domeniconi and Muna Al-Razgan",
title = "Weighted cluster ensembles: {Methods} and analysis",
journal = j-TKDD,
volume = "2",
number = "4",
pages = "17:1--17:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1460797.1460800",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:51 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Cluster ensembles offer a solution to challenges
inherent to clustering arising from its ill-posed
nature. Cluster ensembles can provide robust and stable
solutions by leveraging the consensus across multiple
clustering results, while averaging out emergent
spurious structures that arise due to the various
biases to which each participating algorithm is tuned.
In this article, we address the problem of combining
multiple {\em weighted clusters\/} that belong to
different subspaces of the input space. We leverage the
diversity of the input clusterings in order to generate
a consensus partition that is superior to the
participating ones. Since we are dealing with weighted
clusters, our consensus functions make use of the
weight vectors associated with the clusters. We
demonstrate the effectiveness of our techniques by
running experiments with several real datasets,
including high-dimensional text data. Furthermore, we
investigate in depth the issue of diversity and
accuracy for our ensemble methods. Our analysis and
experimental results show that the proposed techniques
are capable of producing a partition that is as good as
or better than the best individual clustering.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "accuracy and diversity measures; Cluster ensembles;
consensus functions; data mining; subspace clustering;
text data",
}
@Article{Zhang:2009:DGA,
author = "Zhenjie Zhang and Laks V. S. Lakshmanan and Anthony K.
H. Tung",
title = "On domination game analysis for microeconomic data
mining",
journal = j-TKDD,
volume = "2",
number = "4",
pages = "18:1--18:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1460797.1460801",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 17:59:51 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Game theory is a powerful tool for analyzing the
competitions among manufacturers in a market. In this
article, we present a study on combining game theory
and data mining by introducing the concept of
domination game analysis. We present a multidimensional
market model, where every dimension represents one
attribute of a commodity. Every product or customer is
represented by a point in the multidimensional space,
and a product is said to ``dominate'' a customer if all
of its attributes can satisfy the requirements of the
customer. The expected market share of a product is
measured by the expected number of the buyers in the
customers, all of which are equally likely to buy any
product dominating him. A Nash equilibrium is a
configuration of the products achieving stable expected
market shares for all products. We prove that Nash
equilibrium in such a model can be computed in
polynomial time if every manufacturer tries to modify
its product in a round robin manner. To further improve
the efficiency of the computation, we also design two
algorithms for the manufacturers to efficiently find
their best response to other products in the market.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "data mining; Domination game; game theory",
}
@Article{Kriegel:2009:CHD,
author = "Hans-Peter Kriegel and Peer Kr{\"o}ger and Arthur
Zimek",
title = "Clustering high-dimensional data: a survey on subspace
clustering, pattern-based clustering, and correlation
clustering",
journal = j-TKDD,
volume = "3",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1497577.1497578",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "As a prolific research area in data mining, subspace
clustering and related problems induced a vast quantity
of proposed solutions. However, many publications
compare a new proposition --- if at all --- with one or
two competitors, or even with a so-called
``na{\"\i}ve'' ad hoc solution, but fail to clarify the
exact problem definition. As a consequence, even if two
solutions are thoroughly compared experimentally, it
will often remain unclear whether both solutions tackle
the same problem or, if they do, whether they agree in
certain tacit assumptions and how such assumptions may
influence the outcome of an algorithm. In this survey,
we try to clarify: (i) the different problem
definitions related to subspace clustering in general;
(ii) the specific difficulties encountered in this
field of research; (iii) the varying assumptions,
heuristics, and intuitions forming the basis of
different approaches; and (iv) how several prominent
solutions tackle different problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "clustering; high-dimensional data; Survey",
}
@Article{Dhurandhar:2009:SAM,
author = "Amit Dhurandhar and Alin Dobra",
title = "Semi-analytical method for analyzing models and model
selection measures based on moment analysis",
journal = j-TKDD,
volume = "3",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1497577.1497579",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this article we propose a moment-based method for
studying models and model selection measures. By
focusing on the probabilistic space of classifiers
induced by the classification algorithm rather than on
that of datasets, we obtain efficient characterizations
for computing the moments, which is followed by
visualization of the resulting formulae that are too
complicated for direct interpretation. By assuming the
data to be drawn independently and identically
distributed from the underlying probability
distribution, and by going over the space of all
possible datasets, we establish general relationships
between the generalization error, hold-out-set error,
cross-validation error, and leave-one-out error. We
later exemplify the method and the results by studying
the behavior of the errors for the naive Bayes
classifier.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "classification; generalization error; Model
selection",
}
@Article{Cerf:2009:CPM,
author = "Lo{\"\i}c Cerf and J{\'e}r{\'e}my Besson and
C{\'e}line Robardet and Jean-Fran{\c{c}}ois Boulicaut",
title = "Closed patterns meet $n$-ary relations",
journal = j-TKDD,
volume = "3",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1497577.1497580",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Set pattern discovery from binary relations has been
extensively studied during the last decade. In
particular, many complete and efficient algorithms for
frequent closed set mining are now available.
Generalizing such a task to $n$-ary relations ($ n \geq
2$) appears as a timely challenge. It may be important
for many applications, for example, when adding the
time dimension to the popular {\em objects\/} $ \times
$ {\em features\/} binary case. The generality of the
task (no assumption being made on the relation arity or
on the size of its attribute domains) makes it
computationally challenging. We introduce an algorithm
called Data-Peeler. From an $n$-ary relation, it
extracts all closed $n$-sets satisfying given piecewise
(anti) monotonic constraints. This new class of
constraints generalizes both monotonic and
antimonotonic constraints. Considering the special case
of ternary relations, Data-Peeler outperforms the
state-of-the-art algorithms CubeMiner and Trias by
orders of magnitude. These good performances must be
granted to a new clever enumeration strategy allowing
to efficiently enforce the closeness property. The
relevance of the extracted closed $n$-sets is assessed
on real-life 3-and 4-ary relations. Beyond natural 3-or
4-ary relations, expanding a relation with an
additional attribute can help in enforcing rather
abstract constraints such as the robustness with
respect to binarization. Furthermore, a collection of
closed $n$-sets is shown to be an excellent starting
point to compute a tiling of the dataset.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "$n$-ary relations; Closed patterns; constraint
properties; constraint-based mining; tiling",
}
@Article{Angiulli:2009:DEA,
author = "Fabrizio Angiulli and Fabio Fassetti",
title = "{DOLPHIN}: an efficient algorithm for mining
distance-based outliers in very large datasets",
journal = j-TKDD,
volume = "3",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1497577.1497581",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this work a novel distance-based outlier detection
algorithm, named DOLPHIN, working on disk-resident
datasets and whose I/O cost corresponds to the cost of
sequentially reading the input dataset file twice, is
presented.\par
It is both theoretically and empirically shown that the
main memory usage of DOLPHIN amounts to a small
fraction of the dataset and that DOLPHIN has linear
time performance with respect to the dataset size.
DOLPHIN gains efficiency by naturally merging together
in a unified schema three strategies, namely the
selection policy of objects to be maintained in main
memory, usage of pruning rules, and similarity search
techniques. Importantly, similarity search is
accomplished by the algorithm without the need of
preliminarily indexing the whole dataset, as other
methods do.\par
The algorithm is simple to implement and it can be used
with any type of data, belonging to either metric or
nonmetric spaces. Moreover, a modification to the basic
method allows DOLPHIN to deal with the scenario in
which the available buffer of main memory is smaller
than its standard requirements. DOLPHIN has been
compared with state-of-the-art distance-based outlier
detection algorithms, showing that it is much more
efficient.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Data mining; distance-based outliers; outlier
detection",
}
@Article{Chen:2009:BAS,
author = "Bee-Chung Chen and Raghu Ramakrishnan and Jude W.
Shavlik and Pradeep Tamma",
title = "Bellwether analysis: {Searching} for cost-effective
query-defined predictors in large databases",
journal = j-TKDD,
volume = "3",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1497577.1497582",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How to mine massive datasets is a challenging problem
with great potential value. Motivated by this
challenge, much effort has concentrated on developing
scalable versions of machine learning algorithms.
However, the cost of mining large datasets is not just
computational; preparing the datasets into the ``right
form'' so that learning algorithms can be applied is
usually costly, due to the human labor that is
typically required and a large number of choices in
data preparation, which include selecting different
subsets of data and aggregating data at different
granularities. We make the key observation that, for a
number of practically motivated problems, these choices
can be defined using database queries and analyzed in
an automatic and systematic manner. Specifically, we
propose a new class of data-mining problem, called {\em
bellwether analysis}, in which the goal is to find a
few query-defined predictors (e.g., first week sales of
Peoria, IL of an item) that can be used to accurately
predict the result of a target query (e.g., first year
worldwide sales of the item) from a large number of
queries that define candidate predictors. To make a
prediction for a new item, the data needed to generate
such predictors has to be collected (e.g., selling the
new item in Peoria, IL for a week and collecting the
sales data). A useful predictor is one that has high
prediction accuracy and a low data-collection cost. We
call such a cost-effective predictor a {\em
bellwether}.\par
This article introduces bellwether analysis, which
integrates database query processing and predictive
modeling into a single framework, and provides scalable
algorithms for large datasets that cannot fit in main
memory. Through a series of extensive experiments, we
show that bellwethers do exist in real-world databases,
and that our computation techniques achieve good
efficiency on large datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "bellwether; Cost-effective prediction; data cube; OLAP
queries; predictive models; scalable algorithms",
}
@Article{Liu:2009:ISI,
author = "Huan Liu and John Salerno and Michael Young and Rakesh
Agrawal and Philip S. Yu",
title = "Introduction to special issue on social computing,
behavioral modeling, and prediction",
journal = j-TKDD,
volume = "3",
number = "2",
pages = "6:1--6:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1514888.1514889",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:12 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mehler:2009:ENC,
author = "Andrew Mehler and Steven Skiena",
title = "Expanding network communities from representative
examples",
journal = j-TKDD,
volume = "3",
number = "2",
pages = "7:1--7:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1514888.1514890",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:12 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present an approach to leverage a small subset of a
coherent community within a social network into a much
larger, more representative sample. Our problem becomes
identifying a small conductance subgraph containing
many (but not necessarily all) members of the given
seed set. Starting with an initial seed set
representing a sample of a community, we seek to
discover as much of the full community as
possible.\par
We present a general method for network community
expansion, demonstrating that our methods work well in
expanding communities in real world networks starting
from small given seed groups (20 to 400 members). Our
approach is marked by incremental expansion from the
seeds with retrospective analysis to determine the
ultimate boundaries of our community. We demonstrate
how to increase the robustness of the general approach
through bootstrapping multiple random partitions of the
input set into seed and evaluation groups.\par
We go beyond statistical comparisons against gold
standards to careful subjective evaluations of our
expanded communities. This process explains the causes
of most disagreement between our expanded communities
and our gold-standards --- arguing that our expansion
methods provide more reliable communities than can be
extracted from reference sources/gazetteers such as
Wikipedia.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "artificial intelligence; community discovery; Discrete
mathematics; graph theory; news analysis; social
networks",
}
@Article{Lin:2009:ACT,
author = "Yu-Ru Lin and Yun Chi and Shenghuo Zhu and Hari
Sundaram and Belle L. Tseng",
title = "Analyzing communities and their evolutions in dynamic
social networks",
journal = j-TKDD,
volume = "3",
number = "2",
pages = "8:1--8:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1514888.1514891",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:12 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We discover communities from social network data and
analyze the community evolution. These communities are
inherent characteristics of human interaction in online
social networks, as well as paper citation networks.
Also, communities may evolve over time, due to changes
to individuals' roles and social status in the network
as well as changes to individuals' research interests.
We present an innovative algorithm that deviates from
the traditional two-step approach to analyze community
evolutions. In the traditional approach, communities
are first detected for each time slice, and then
compared to determine correspondences. We argue that
this approach is inappropriate in applications with
noisy data. In this paper, we propose {\em FacetNet\/}
for analyzing communities and their evolutions through
a robust {\em unified\/} process. This novel framework
will discover communities and capture their evolution
with temporal smoothness given by historic community
structures. Our approach relies on formulating the
problem in terms of maximum a posteriori (MAP)
estimation, where the community structure is estimated
both by the observed networked data and by the prior
distribution given by historic community structures.
Then we develop an iterative algorithm, with proven low
time complexity, which is guaranteed to converge to an
optimal solution. We perform extensive experimental
studies, on both synthetic datasets and real datasets,
to demonstrate that our method discovers meaningful
communities and provides additional insights not
directly obtainable from traditional methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Community; community net; evolution; evolution net;
nonnegative matrix factorization; soft membership",
}
@Article{Kimura:2009:BLM,
author = "Masahiro Kimura and Kazumi Saito and Hiroshi Motoda",
title = "Blocking links to minimize contamination spread in a
social network",
journal = j-TKDD,
volume = "3",
number = "2",
pages = "9:1--9:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1514888.1514892",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:12 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We address the problem of minimizing the propagation
of undesirable things, such as computer viruses or
malicious rumors, by blocking a limited number of links
in a network, which is converse to the influence
maximization problem in which the most influential
nodes for information diffusion is searched in a social
network. This minimization problem is more fundamental
than the problem of preventing the spread of
contamination by removing nodes in a network. We
introduce two definitions for the contamination degree
of a network, accordingly define two contamination
minimization problems, and propose methods for
efficiently finding good approximate solutions to these
problems on the basis of a naturally greedy strategy.
Using large social networks, we experimentally
demonstrate that the proposed methods outperform
conventional link-removal methods. We also show that
unlike the case of blocking a limited number of nodes,
the strategy of removing nodes with high out-degrees is
not necessarily effective for these problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Contamination diffusion; link analysis; social
networks",
}
@Article{Agichtein:2009:MIS,
author = "Eugene Agichtein and Yandong Liu and Jiang Bian",
title = "Modeling information-seeker satisfaction in community
question answering",
journal = j-TKDD,
volume = "3",
number = "2",
pages = "10:1--10:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1514888.1514893",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Fri Apr 24 18:00:12 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Question Answering Communities such as Naver, Baidu
Knows, and Yahoo! Answers have emerged as popular, and
often effective, means of information seeking on the
web. By posting questions for other participants to
answer, information seekers can obtain specific answers
to their questions. Users of CQA portals have already
contributed millions of questions, and received
hundreds of millions of answers from other
participants. However, CQA is not always effective: in
some cases, a user may obtain a perfect answer within
minutes, and in others it may require hours --- and
sometimes days --- until a satisfactory answer is
contributed. We investigate the problem of predicting
information seeker satisfaction in collaborative
question answering communities, where we attempt to
predict whether a question author will be satisfied
with the answers submitted by the community
participants. We present a general prediction model,
and develop a variety of content, structure, and
community-focused features for this task. Our
experimental results, obtained from a large-scale
evaluation over thousands of real questions and user
ratings, demonstrate the feasibility of modeling and
predicting asker satisfaction. We complement our
results with a thorough investigation of the
interactions and information seeking patterns in
question answering communities that correlate with
information seeker satisfaction. We also explore {\em
personalized\/} models of asker satisfaction, and show
that when sufficient interaction history exists,
personalization can significantly improve prediction
accuracy over a ``one-size-fits-all'' model. Our models
and predictions could be useful for a variety of
applications, such as user intent inference, answer
ranking, interface design, and query suggestion and
routing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Community question answering; information seeker
satisfaction",
}
@Article{Torvik:2009:AND,
author = "Vetle I. Torvik and Neil R. Smalheiser",
title = "Author name disambiguation in {MEDLINE}",
journal = j-TKDD,
volume = "3",
number = "3",
pages = "11:1--11:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1552303.1552304",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:36:58 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "{\em Background\/}: We recently described
``Author-ity,'' a model for estimating the probability
that two articles in MEDLINE, sharing the same author
name, were written by the same individual. Features
include shared title words, journal name, coauthors,
medical subject headings, language, affiliations, and
author name features (middle initial, suffix, and
prevalence in MEDLINE). Here we test the hypothesis
that the Author-ity model will suffice to disambiguate
author names for the vast majority of articles in
MEDLINE. {\em Methods\/}: Enhancements include: (a)
incorporating first names and their variants, email
addresses, and correlations between specific last names
and affiliation words; (b) new methods of generating
large unbiased training sets; (c) new methods for
estimating the prior probability; (d) a weighted least
squares algorithm for correcting transitivity
violations; and (e) a maximum likelihood based
agglomerative algorithm for computing clusters of
articles that represent inferred author-individuals.
{\em Results\/}: Pairwise comparisons were computed for
all author names on all 15.3 million articles in
MEDLINE (2006 baseline), that share last name and first
initial, to create Author-ity 2006, a database that has
each name on each article assigned to one of 6.7
million inferred author-individual clusters. Recall is
estimated at $ \approx 98.8 \% $. Lumping (putting two
different individuals into the same cluster) affects $
\approx 0.5 \% $ of clusters, whereas splitting
(assigning articles written by the same individual to $
> 1 $ cluster) affects $ \approx 2 \% $ of articles.
{\em Impact\/}: The Author-ity model can be applied
generally to other bibliographic databases. Author name
disambiguation allows information retrieval and data
integration to become {\em person-centered}, not just
{\em document-centered}, setting the stage for new data
mining and social network tools that will facilitate
the analysis of scholarly publishing and collaboration
behavior. {\em Availability\/}: The Author-ity 2006
database is available for nonprofit academic research,
and can be freely queried via
http://arrowsmith.psych.uic.edu.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "bibliographic databases; Name disambiguation",
}
@Article{Tu:2009:SDC,
author = "Li Tu and Yixin Chen",
title = "Stream data clustering based on grid density and
attraction",
journal = j-TKDD,
volume = "3",
number = "3",
pages = "12:1--12:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1552303.1552305",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:36:58 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering real-time stream data is an important and
challenging problem. Existing algorithms such as
CluStream are based on the {\em k\/} -means algorithm.
These clustering algorithms have difficulties finding
clusters of arbitrary shapes and handling outliers.
Further, they require the knowledge of {\em k\/} and
user-specified time window. To address these issues,
this article proposes {\em D-Stream}, a framework for
clustering stream data using a density-based
approach.\par
Our algorithm uses an online component that maps each
input data record into a grid and an offline component
that computes the grid density and clusters the grids
based on the density. The algorithm adopts a density
decaying technique to capture the dynamic changes of a
data stream and a attraction-based mechanism to
accurately generate cluster boundaries.\par
Exploiting the intricate relationships among the decay
factor, attraction, data density, and cluster
structure, our algorithm can efficiently and
effectively generate and adjust the clusters in real
time. Further, a theoretically sound technique is
developed to detect and remove sporadic grids mapped by
outliers in order to dramatically improve the space and
time efficiency of the system. The technique makes
high-speed data stream clustering feasible without
degrading the clustering quality. The experimental
results show that our algorithm has superior quality
and efficiency, can find clusters of arbitrary shapes,
and can accurately recognize the evolving behaviors of
real-time data streams.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "clustering; data mining; density-based algorithms;
Stream data",
}
@Article{Zhou:2009:LST,
author = "Bin Zhou and Jian Pei",
title = "Link spam target detection using page farms",
journal = j-TKDD,
volume = "3",
number = "3",
pages = "13:1--13:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1552303.1552306",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:36:58 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Currently, most popular Web search engines adopt some
link-based ranking methods such as PageRank. Driven by
the huge potential benefit of improving rankings of Web
pages, many tricks have been attempted to boost page
rankings. The most common way, which is known as link
spam, is to make up some artificially designed link
structures. Detecting link spam effectively is a big
challenge. In this article, we develop novel and
effective detection methods for link spam target pages
using page farms. The essential idea is intuitive:
whether a page is the beneficiary of link spam is
reflected by how it collects its PageRank score.
Technically, how a target page collects its PageRank
score is modeled by a page farm, which consists of
pages contributing a major portion of the PageRank
score of the target page. We propose two spamicity
measures based on page farms. They can be used as an
effective measure to check whether the pages are link
spam target pages. An empirical study using a newly
available real dataset strongly suggests that our
method is effective. It outperforms the
state-of-the-art methods like SpamRank and SpamMass in
both precision and recall.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Link Spam; Page Farm; PageRank",
}
@Article{Wan:2009:DBC,
author = "Li Wan and Wee Keong Ng and Xuan Hong Dang and Philip
S. Yu and Kuan Zhang",
title = "Density-based clustering of data streams at multiple
resolutions",
journal = j-TKDD,
volume = "3",
number = "3",
pages = "14:1--14:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1552303.1552307",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:36:58 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In data stream clustering, it is desirable to have
algorithms that are able to detect clusters of
arbitrary shape, clusters that evolve over time, and
clusters with noise. Existing stream data clustering
algorithms are generally based on an online-offline
approach: The online component captures synopsis
information from the data stream (thus, overcoming
real-time and memory constraints) and the offline
component generates clusters using the stored synopsis.
The online-offline approach affects the overall
performance of stream data clustering in various ways:
the ease of deriving synopsis from streaming data; the
complexity of data structure for storing and managing
synopsis; and the frequency at which the offline
component is used to generate clusters. In this
article, we propose an algorithm that (1) computes and
updates synopsis information in constant time; (2)
allows users to discover clusters at multiple
resolutions; (3) determines the right time for users to
generate clusters from the synopsis information; (4)
generates clusters of higher purity than existing
algorithms; and (5) determines the right threshold
function for density-based clustering based on the
fading model of stream data. To the best of our
knowledge, no existing data stream algorithms has all
of these features. Experimental results show that our
algorithm is able to detect arbitrarily shaped,
evolving clusters with high quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Data mining algorithms; density based clustering;
evolving data streams",
}
@Article{Mannila:2009:ATS,
author = "Heikki Mannila and Dimitrios Gunopulos",
title = "{ACM TKDD} special issue {ACM SIGKDD 2007} and {ACM
SIGKDD 2008}",
journal = j-TKDD,
volume = "3",
number = "4",
pages = "15:1--15:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1631162.1631163",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:13 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Asur:2009:EBF,
author = "Sitaram Asur and Srinivasan Parthasarathy and Duygu
Ucar",
title = "An event-based framework for characterizing the
evolutionary behavior of interaction graphs",
journal = j-TKDD,
volume = "3",
number = "4",
pages = "16:1--16:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1631162.1631164",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:13 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Interaction graphs are ubiquitous in many fields such
as bioinformatics, sociology and physical sciences.
There have been many studies in the literature targeted
at studying and mining these graphs. However, almost
all of them have studied these graphs from a static
point of view. The study of the evolution of these
graphs over time can provide tremendous insight on the
behavior of entities, communities and the flow of
information among them. In this work, we present an
event-based characterization of critical behavioral
patterns for temporally varying interaction graphs. We
use nonoverlapping snapshots of interaction graphs and
develop a framework for capturing and identifying
interesting events from them. We use these events to
characterize complex behavioral patterns of individuals
and communities over time. We show how semantic
information can be incorporated to reason about
community-behavior events. We also demonstrate the
application of behavioral patterns for the purposes of
modeling evolution, link prediction and influence
maximization. Finally, we present a diffusion model for
evolving networks, based on our framework.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "diffusion of innovations; Dynamic interaction
networks; evolutionary analysis",
}
@Article{Chi:2009:ESC,
author = "Yun Chi and Xiaodan Song and Dengyong Zhou and Koji
Hino and Belle L. Tseng",
title = "On evolutionary spectral clustering",
journal = j-TKDD,
volume = "3",
number = "4",
pages = "17:1--17:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1631162.1631165",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:13 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Evolutionary clustering is an emerging research area
essential to important applications such as clustering
dynamic Web and blog contents and clustering data
streams. In evolutionary clustering, a good clustering
result should fit the current data well, while
simultaneously not deviate too dramatically from the
recent history. To fulfill this dual purpose, a measure
of {\em temporal smoothness\/} is integrated in the
overall measure of clustering quality. In this article,
we propose two frameworks that incorporate temporal
smoothness in evolutionary spectral clustering. For
both frameworks, we start with intuitions gained from
the well-known {\em k\/} -means clustering problem, and
then propose and solve corresponding cost functions for
the evolutionary spectral clustering problems. Our
solutions to the evolutionary spectral clustering
problems provide more stable and consistent clustering
results that are less sensitive to short-term noises
while at the same time are adaptive to long-term
cluster drifts. Furthermore, we demonstrate that our
methods provide the optimal solutions to the relaxed
versions of the corresponding evolutionary {\em k\/}
-means clustering problems. Performance experiments
over a number of real and synthetic data sets
illustrate our evolutionary spectral clustering methods
provide more robust clustering results that are not
sensitive to noise and can adapt to data drifts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Evolutionary spectral clustering; preserving cluster
membership; preserving cluster quality; temporal
smoothness",
}
@Article{Fujiwara:2009:FLS,
author = "Yasuhiro Fujiwara and Yasushi Sakurai and Masaru
Kitsuregawa",
title = "Fast likelihood search for hidden {Markov} models",
journal = j-TKDD,
volume = "3",
number = "4",
pages = "18:1--18:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1631162.1631166",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:13 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Hidden Markov models (HMMs) are receiving considerable
attention in various communities and many applications
that use HMMs have emerged such as mental task
classification, biological analysis, traffic
monitoring, and anomaly detection. This article has two
goals; The first goal is exact and efficient
identification of the model whose state sequence has
the highest likelihood for the given query sequence
(more precisely, no HMM that actually has a
high-probability path for the given sequence is missed
by the algorithm), and the second goal is exact and
efficient monitoring of streaming data sequences to
find the best model. We propose SPIRAL, a fast search
method for HMM datasets. SPIRAL is based on three
ideas; (1) it clusters states of models to compute
approximate likelihood, (2) it uses several
granularities and approximates likelihood values in
search processing, and (3) it focuses on just the
promising likelihood computations by pruning out
low-likelihood state sequences. Experiments verify the
effectiveness of SPIRAL and show that it is more than
490 times faster than the naive method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Hidden Markov model; likelihood; upper bound",
}
@Article{Zhang:2009:EAG,
author = "Xiang Zhang and Fei Zou and Wei Wang",
title = "Efficient algorithms for genome-wide association
study",
journal = j-TKDD,
volume = "3",
number = "4",
pages = "19:1--19:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1631162.1631167",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:13 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Studying the association between quantitative
phenotype (such as height or weight) and single
nucleotide polymorphisms (SNPs) is an important problem
in biology. To understand underlying mechanisms of
complex phenotypes, it is often necessary to consider
joint genetic effects across multiple SNPs. ANOVA
(analysis of variance) test is routinely used in
association study. Important findings from studying
gene-gene (SNP-pair) interactions are appearing in the
literature. However, the number of SNPs can be up to
millions. Evaluating joint effects of SNPs is a
challenging task even for SNP-pairs. Moreover, with
large number of SNPs correlated, permutation procedure
is preferred over simple Bonferroni correction for
properly controlling family-wise error rate and
retaining mapping power, which dramatically increases
the computational cost of association study.\par
In this article, we study the problem of finding
SNP-pairs that have significant associations with a
given quantitative phenotype. We propose an efficient
algorithm, FastANOVA, for performing ANOVA tests on
SNP-pairs in a batch mode, which also supports large
permutation test. We derive an upper bound of SNP-pair
ANOVA test, which can be expressed as the sum of two
terms. The first term is based on single-SNP ANOVA
test. The second term is based on the SNPs and
independent of any phenotype permutation. Furthermore,
SNP-pairs can be organized into groups, each of which
shares a common upper bound. This allows for maximum
reuse of intermediate computation, efficient upper
bound estimation, and effective SNP-pair pruning.
Consequently, FastANOVA only needs to perform the ANOVA
test on a small number of candidate SNP-pairs without
the risk of missing any significant ones. Extensive
experiments demonstrate that FastANOVA is orders of
magnitude faster than the brute-force implementation of
ANOVA tests on all SNP pairs. The principles used in
FastANOVA can be applied to categorical phenotypes and
other statistics such as Chi-square test.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "ANOVA test; Association study; permutation test",
}
@Article{Bilgic:2009:RCM,
author = "Mustafa Bilgic and Lise Getoor",
title = "Reflect and correct: a misclassification prediction
approach to active inference",
journal = j-TKDD,
volume = "3",
number = "4",
pages = "20:1--20:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1631162.1631168",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:13 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Information diffusion, viral marketing, graph-based
semi-supervised learning, and collective classification
all attempt to model and exploit the relationships
among nodes in a network to improve the performance of
node labeling algorithms. However, sometimes the
advantage of exploiting the relationships can become a
disadvantage. Simple models like label propagation and
iterative classification can aggravate a
misclassification by propagating mistakes in the
network, while more complex models that define and
optimize a global objective function, such as Markov
random fields and graph mincuts, can misclassify a set
of nodes jointly. This problem can be mitigated if the
classification system is allowed to ask for the correct
labels for a few of the nodes during inference.
However, determining the optimal set of labels to
acquire is intractable under relatively general
assumptions, which forces us to resort to approximate
and heuristic techniques. We describe three such
techniques in this article. The first one is based on
directly approximating the value of the objective
function of label acquisition and greedily acquiring
the label that provides the most improvement. The
second technique is a simple technique based on the
analogy we draw between viral marketing and label
acquisition. Finally, we propose a method, which we
refer to as {\em reflect and correct}, that can learn
and predict when the classification system is likely to
make mistakes and suggests acquisitions to correct
those mistakes. We empirically show on a variety of
synthetic and real-world datasets that the reflect and
correct method significantly outperforms the other two
techniques, as well as other approaches based on
network structural measures such as node degree and
network clustering.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Active inference; collective classification;
information diffusion; label acquisition; viral
marketing",
}
@Article{Kiernan:2009:CCS,
author = "Jerry Kiernan and Evimaria Terzi",
title = "Constructing comprehensive summaries of large event
sequences",
journal = j-TKDD,
volume = "3",
number = "4",
pages = "21:1--21:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1631162.1631169",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:13 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Event sequences capture system and user activity over
time. Prior research on sequence mining has mostly
focused on discovering local patterns appearing in a
sequence. While interesting, these patterns do not give
a comprehensive summary of the entire event sequence.
Moreover, the number of patterns discovered can be
large. In this article, we take an alternative approach
and build {\em short\/} summaries that describe an
entire sequence, and discover local dependencies
between event types.\par
We formally define the summarization problem as an
optimization problem that balances shortness of the
summary with accuracy of the data description. We show
that this problem can be solved optimally in polynomial
time by using a combination of two dynamic-programming
algorithms. We also explore more efficient greedy
alternatives and demonstrate that they work well on
large datasets. Experiments on both synthetic and real
datasets illustrate that our algorithms are efficient
and produce high-quality results, and reveal
interesting local structures in the data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Event sequences; log mining; summarization",
}
@Article{Koren:2010:FNS,
author = "Yehuda Koren",
title = "Factor in the neighbors: {Scalable} and accurate
collaborative filtering",
journal = j-TKDD,
volume = "4",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1644873.1644874",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:37 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Recommender systems provide users with personalized
suggestions for products or services. These systems
often rely on collaborating filtering (CF), where past
transactions are analyzed in order to establish
connections between users and products. The most common
approach to CF is based on neighborhood models, which
originate from similarities between products or users.
In this work we introduce a new neighborhood model with
an improved prediction accuracy. Unlike previous
approaches that are based on heuristic similarities, we
model neighborhood relations by minimizing a global
cost function. Further accuracy improvements are
achieved by extending the model to exploit both
explicit and implicit feedback by the users. Past
models were limited by the need to compute all pairwise
similarities between items or users, which grow
quadratically with input size. In particular, this
limitation vastly complicates adopting user similarity
models, due to the typical large number of users. Our
new model solves these limitations by factoring the
neighborhood model, thus making both item-item and
user-user implementations scale linearly with the size
of the data. The methods are tested on the Netflix
data, with encouraging results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "collaborative filtering; Netflix Prize; Recommender
systems",
}
@Article{Syed:2010:MDP,
author = "Zeeshan Syed and Collin Stultz and Manolis Kellis and
Piotr Indyk and John Guttag",
title = "Motif discovery in physiological datasets: a
methodology for inferring predictive elements",
journal = j-TKDD,
volume = "4",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1644873.1644875",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:37 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this article, we propose a methodology for
identifying predictive physiological patterns in the
absence of prior knowledge. We use the principle of
conservation to identify activity that consistently
precedes an outcome in patients, and describe a
two-stage process that allows us to efficiently search
for such patterns in large datasets. This involves
first transforming continuous physiological signals
from patients into symbolic sequences, and then
searching for patterns in these reduced representations
that are strongly associated with an outcome.\par
Our strategy of identifying conserved activity that is
unlikely to have occurred purely by chance in symbolic
data is analogous to the discovery of regulatory motifs
in genomic datasets. We build upon existing work in
this area, generalizing the notion of a regulatory
motif and enhancing current techniques to operate
robustly on non-genomic data. We also address two
significant considerations associated with motif
discovery in general: computational efficiency and
robustness in the presence of degeneracy and noise. To
deal with these issues, we introduce the concept of
active regions and new subset-based techniques such as
a two-layer Gibbs sampling algorithm. These extensions
allow for a framework for information inference, where
precursors are identified as approximately conserved
activity of arbitrary complexity preceding multiple
occurrences of an event.\par
We evaluated our solution on a population of patients
who experienced sudden cardiac death and attempted to
discover electrocardiographic activity that may be
associated with the endpoint of death. To assess the
predictive patterns discovered, we compared likelihood
scores for motifs in the sudden death population
against control populations of normal individuals and
those with non-fatal supraventricular arrhythmias. Our
results suggest that predictive motif discovery may be
able to identify clinically relevant information even
in the absence of significant prior knowledge.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "data mining; Gibbs sampling; inference; knowledge
discovery; motifs; physiological signals",
}
@Article{Webb:2010:SSI,
author = "Geoffrey I. Webb",
title = "Self-sufficient itemsets: an approach to screening
potentially interesting associations between items",
journal = j-TKDD,
volume = "4",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1644873.1644876",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:37 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Self-sufficient itemsets are those whose frequency
cannot be explained solely by the frequency of either
their subsets or of their supersets. We argue that
itemsets that are not self-sufficient will often be of
little interest to the data analyst, as their frequency
should be expected once that of the itemsets on which
their frequency depends is known. We present tests for
statistically sound discovery of self-sufficient
itemsets, and computational techniques that allow those
tests to be applied as a post-processing step for any
itemset discovery algorithm. We also present a measure
for assessing the degree of potential interest in an
itemset that complements these statistical measures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Association discovery; association rules; itemset
discovery; itemset screening; statistical evaluation",
}
@Article{Plantevit:2010:MMM,
author = "Marc Plantevit and Anne Laurent and Dominique Laurent
and Maguelonne Teisseire and Yeow Wei Choong",
title = "Mining multidimensional and multilevel sequential
patterns",
journal = j-TKDD,
volume = "4",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1644873.1644877",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:37 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multidimensional databases have been designed to
provide decision makers with the necessary tools to
help them understand their data. This framework is
different from transactional data as the datasets
contain huge volumes of historicized and aggregated
data defined over a set of dimensions that can be
arranged through multiple levels of granularities. Many
tools have been proposed to query the data and navigate
through the levels of granularity. However, automatic
tools are still missing to mine this type of data in
order to discover regular specific patterns. In this
article, we present a method for mining sequential
patterns from multidimensional databases, at the same
time taking advantage of the different dimensions and
levels of granularity, which is original compared to
existing work. The necessary definitions and algorithms
are extended from regular sequential patterns to this
particular case. Experiments are reported, showing the
significance of this approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "frequent patterns; hierarchy; multidimensional
databases; multilevel patterns; Sequential patterns",
}
@Article{Zaki:2010:VVO,
author = "Mohammed J. Zaki and Christopher D. Carothers and
Boleslaw K. Szymanski",
title = "{VOGUE}: a variable order hidden {Markov} model with
duration based on frequent sequence mining",
journal = j-TKDD,
volume = "4",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1644873.1644878",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Tue Mar 16 18:37:37 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present VOGUE, a novel, variable order hidden
Markov model with state durations, that combines two
separate techniques for modeling complex patterns in
sequential data: pattern mining and data modeling.
VOGUE relies on a variable gap sequence mining method
to extract frequent patterns with different lengths and
gaps between elements. It then uses these mined
sequences to build a variable order hidden Markov model
(HMM), that explicitly models the gaps. The gaps
implicitly model the order of the HMM, and they
explicitly model the duration of each state. We apply
VOGUE to a variety of real sequence data taken from
domains such as protein sequence classification, Web
usage logs, intrusion detection, and spelling
correction. We show that VOGUE has superior
classification accuracy compared to regular HMMs,
higher-order HMMs, and even special purpose HMMs like
HMMER, which is a state-of-the-art method for protein
classification. The VOGUE implementation and the
datasets used in this article are available as
open-source.$^1$",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "Hidden Markov models; higher-order HMM; HMM with
duration; sequence mining and modeling; variable-order
HMM",
}
@Article{Vadera:2010:CCS,
author = "Sunil Vadera",
title = "{CSNL}: a cost-sensitive non-linear decision tree
algorithm",
journal = j-TKDD,
volume = "4",
number = "2",
pages = "6:1--6:??",
month = may,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1754428.1754429",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Sat Aug 14 17:12:30 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This article presents a new decision tree learning
algorithm called CSNL that induces Cost-Sensitive
Non-Linear decision trees. The algorithm is based on
the hypothesis that nonlinear decision nodes provide a
better basis than axis-parallel decision nodes and
utilizes discriminant analysis to construct nonlinear
decision trees that take account of costs of
misclassification.\par
The performance of the algorithm is evaluated by
applying it to seventeen datasets and the results are
compared with those obtained by two well known
cost-sensitive algorithms, ICET and MetaCost, which
generate multiple trees to obtain some of the best
results to date. The results show that CSNL performs at
least as well, if not better than these algorithms, in
more than twelve of the datasets and is considerably
faster. The use of bagging with CSNL further enhances
its performance showing the significant benefits of
using nonlinear decision nodes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "cost-sensitive learning; Decision tree learning",
}
@Article{Kandylas:2010:AKC,
author = "Vasileios Kandylas and S. Phineas Upham and Lyle H.
Ungar",
title = "Analyzing knowledge communities using foreground and
background clusters",
journal = j-TKDD,
volume = "4",
number = "2",
pages = "7:1--7:??",
month = may,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1754428.1754430",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Sat Aug 14 17:12:30 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Insight into the growth (or shrinkage) of ``knowledge
communities'' of authors that build on each other's
work can be gained by studying the evolution over time
of clusters of documents. We cluster documents based on
the documents they cite in common using the Streemer
clustering method, which finds cohesive foreground
clusters (the knowledge communities) embedded in a
diffuse background. We build predictive models with
features based on the citation structure, the
vocabulary of the papers, and the affiliations and
prestige of the authors and use these models to study
the drivers of community growth and the predictors of
how widely a paper will be cited. We find that
scientific knowledge communities tend to grow more
rapidly if their publications build on diverse
information and use narrow vocabulary and that papers
that lie on the periphery of a community have the
highest impact, while those not in any community have
the lowest impact.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "citation analysis; clustering; community evolution;
knowledge communities; Text mining",
}
@Article{Ji:2010:SSL,
author = "Shuiwang Ji and Lei Tang and Shipeng Yu and Jieping
Ye",
title = "A shared-subspace learning framework for multi-label
classification",
journal = j-TKDD,
volume = "4",
number = "2",
pages = "8:1--8:??",
month = may,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1754428.1754431",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Sat Aug 14 17:12:30 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multi-label problems arise in various domains such as
multi-topic document categorization, protein function
prediction, and automatic image annotation. One natural
way to deal with such problems is to construct a binary
classifier for each label, resulting in a set of
independent binary classification problems. Since
multiple labels share the same input space, and the
semantics conveyed by different labels are usually
correlated, it is essential to exploit the correlation
information contained in different labels. In this
paper, we consider a general framework for extracting
shared structures in multi-label classification. In
this framework, a common subspace is assumed to be
shared among multiple labels. We show that the optimal
solution to the proposed formulation can be obtained by
solving a generalized eigenvalue problem, though the
problem is nonconvex. For high-dimensional problems,
direct computation of the solution is expensive, and we
develop an efficient algorithm for this case. One
appealing feature of the proposed framework is that it
includes several well-known algorithms as special
cases, thus elucidating their intrinsic relationships.
We further show that the proposed framework can be
extended to the kernel-induced feature space. We have
conducted extensive experiments on multi-topic web page
categorization and automatic gene expression pattern
image annotation tasks, and results demonstrate the
effectiveness of the proposed formulation in comparison
with several representative algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "gene expression pattern image annotation; kernel
methods; least squares loss; Multi-label
classification; shared subspace; singular value
decomposition; web page categorization",
}
@Article{Ruggieri:2010:DMD,
author = "Salvatore Ruggieri and Dino Pedreschi and Franco
Turini",
title = "Data mining for discrimination discovery",
journal = j-TKDD,
volume = "4",
number = "2",
pages = "9:1--9:??",
month = may,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1754428.1754432",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Sat Aug 14 17:12:30 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In the context of civil rights law, discrimination
refers to unfair or unequal treatment of people based
on membership to a category or a minority, without
regard to individual merit. Discrimination in credit,
mortgage, insurance, labor market, and education has
been investigated by researchers in economics and human
sciences. With the advent of automatic decision support
systems, such as credit scoring systems, the ease of
data collection opens several challenges to data
analysts for the fight against discrimination. In this
article, we introduce the problem of discovering
discrimination through data mining in a dataset of
historical decision records, taken by humans or by
automatic systems. We formalize the processes of direct
and indirect discrimination discovery by modelling
protected-by-law groups and contexts where
discrimination occurs in a classification rule based
syntax. Basically, classification rules extracted from
the dataset allow for unveiling contexts of unlawful
discrimination, where the degree of burden over
protected-by-law groups is formalized by an extension
of the lift measure of a classification rule. In direct
discrimination, the extracted rules can be directly
mined in search of discriminatory contexts. In indirect
discrimination, the mining process needs some
background knowledge as a further input, for example,
census data, that combined with the extracted rules
might allow for unveiling contexts of discriminatory
decisions. A strategy adopted for combining extracted
classification rules with background knowledge is
called an inference model. In this article, we propose
two inference models and provide automatic procedures
for their implementation. An empirical assessment of
our results is provided on the German credit dataset
and on the PKDD Discovery Challenge 1999 financial
dataset.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
keywords = "classification rules; Discrimination",
}
@Article{Thomas:2010:MMF,
author = "Lini T. Thomas and Satyanarayana R. Valluri and
Kamalakar Karlapalem",
title = "{MARGIN}: {Maximal} frequent subgraph mining",
journal = j-TKDD,
volume = "4",
number = "3",
pages = "10:1--10:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1839490.1839491",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:57 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Deodhar:2010:SFS,
author = "Meghana Deodhar and Joydeep Ghosh",
title = "{SCOAL}: a framework for simultaneous co-clustering
and learning from complex data",
journal = j-TKDD,
volume = "4",
number = "3",
pages = "11:1--11:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1839490.1839492",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:57 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2010:BBI,
author = "Jinlin Chen and Keli Xiao",
title = "{BISC}: a bitmap itemset support counting approach for
efficient frequent itemset mining",
journal = j-TKDD,
volume = "4",
number = "3",
pages = "12:1--12:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1839490.1839493",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:57 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Becchetti:2010:EAL,
author = "Luca Becchetti and Paolo Boldi and Carlos Castillo and
Aristides Gionis",
title = "Efficient algorithms for large-scale local triangle
counting",
journal = j-TKDD,
volume = "4",
number = "3",
pages = "13:1--13:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1839490.1839494",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:57 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2010:MDR,
author = "Yin Zhang and Zhi-Hua Zhou",
title = "Multilabel dimensionality reduction via dependence
maximization",
journal = j-TKDD,
volume = "4",
number = "3",
pages = "14:1--14:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1839490.1839495",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:57 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cui:2010:LMN,
author = "Ying Cui and Xiaoli Z. Fern and Jennifer G. Dy",
title = "Learning multiple nonredundant clusterings",
journal = j-TKDD,
volume = "4",
number = "3",
pages = "15:1--15:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1839490.1839496",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:57 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2010:TSI,
author = "Wei Wang",
title = "{TKDD} Special Issue: {SIGKDD 2009}",
journal = j-TKDD,
volume = "4",
number = "4",
pages = "16:1--16:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1857947.1857948",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:58 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2010:BTA,
author = "Ye Chen and Dmitry Pavlov and John F. Canny",
title = "Behavioral Targeting: The Art of Scaling Up Simple
Algorithms",
journal = j-TKDD,
volume = "4",
number = "4",
pages = "17:1--17:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1857947.1857949",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:58 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mohammed:2010:CDA,
author = "Noman Mohammed and Benjamin C. M. Fung and Patrick C.
K. Hung and Cheuk-Kwong Lee",
title = "Centralized and Distributed Anonymization for
High-Dimensional Healthcare Data",
journal = j-TKDD,
volume = "4",
number = "4",
pages = "18:1--18:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1857947.1857950",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:58 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2010:BBM,
author = "Chao Liu and Fan Guo and Christos Faloutsos",
title = "{Bayesian} Browsing Model: Exact Inference of Document
Relevance from Petabyte-Scale Data",
journal = j-TKDD,
volume = "4",
number = "4",
pages = "19:1--19:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1857947.1857951",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:58 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2010:MAF,
author = "Mingxi Wu and Chris Jermaine and Sanjay Ranka and
Xiuyao Song and John Gums",
title = "A Model-Agnostic Framework for Fast Spatial Anomaly
Detection",
journal = j-TKDD,
volume = "4",
number = "4",
pages = "20:1--20:??",
month = oct,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1857947.1857952",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:58 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhong:2010:ATS,
author = "Ning Zhong and Gregory Piatetsky-Shapiro and Yiyu Yao
and Philip S. Yu",
title = "{ACM TKDD} Special Issue on Knowledge Discovery for
{Web} Intelligence",
journal = j-TKDD,
volume = "5",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870096.1870097",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tang:2010:CAW,
author = "Jie Tang and Limin Yao and Duo Zhang and Jing Zhang",
title = "A Combination Approach to {Web} User Profiling",
journal = j-TKDD,
volume = "5",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870096.1870098",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bouguessa:2010:DKS,
author = "Mohamed Bouguessa and Shengrui Wang and Benoit
Dumoulin",
title = "Discovering Knowledge-Sharing Communities in
Question-Answering Forums",
journal = j-TKDD,
volume = "5",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870096.1870099",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Plangprasopchok:2010:MSA,
author = "Anon Plangprasopchok and Kristina Lerman",
title = "Modeling Social Annotation: a {Bayesian} Approach",
journal = j-TKDD,
volume = "5",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870096.1870100",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sakurai:2010:FDG,
author = "Yasushi Sakurai and Christos Faloutsos and Spiros
Papadimitriou",
title = "Fast Discovery of Group Lag Correlations in Streams",
journal = j-TKDD,
volume = "5",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870096.1870101",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2010:FCP,
author = "Kun Liu and Evimaria Terzi",
title = "A Framework for Computing the Privacy Scores of Users
in Online Social Networks",
journal = j-TKDD,
volume = "5",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1870096.1870102",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:43:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2011:ISI,
author = "Jimeng Sun and Yan Liu and Jie Tang and Chid Apte",
title = "Introduction to Special Issue on Large-Scale Data
Mining",
journal = j-TKDD,
volume = "5",
number = "2",
pages = "7:1--7:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1921632.1921633",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:44:01 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kang:2011:HMR,
author = "U. Kang and Charalampos E. Tsourakakis and Ana Paula
Appel and Christos Faloutsos and Jure Leskovec",
title = "{HADI}: Mining Radii of Large Graphs",
journal = j-TKDD,
volume = "5",
number = "2",
pages = "8:1--8:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1921632.1921634",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:44:01 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{deVries:2011:RRL,
author = "Timothy de Vries and Hui Ke and Sanjay Chawla and
Peter Christen",
title = "Robust Record Linkage Blocking Using Suffix Arrays and
{Bloom} Filters",
journal = j-TKDD,
volume = "5",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1921632.1921635",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:44:01 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dunlavy:2011:TLP,
author = "Daniel M. Dunlavy and Tamara G. Kolda and Evrim Acar",
title = "Temporal Link Prediction Using Matrix and Tensor
Factorizations",
journal = j-TKDD,
volume = "5",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1921632.1921636",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:44:01 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Magdalinos:2011:ECQ,
author = "Panagis Magdalinos and Christos Doulkeridis and
Michalis Vazirgiannis",
title = "Enhancing Clustering Quality through Landmark-Based
Dimensionality Reduction",
journal = j-TKDD,
volume = "5",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1921632.1921637",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:44:01 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cheng:2011:CLA,
author = "Hong Cheng and Yang Zhou and Jeffrey Xu Yu",
title = "Clustering Large Attributed Graphs: a Balance between
Structural and Attribute Similarities",
journal = j-TKDD,
volume = "5",
number = "2",
pages = "12:1--12:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1921632.1921638",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:44:01 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Menon:2011:FAA,
author = "Aditya Krishna Menon and Charles Elkan",
title = "Fast Algorithms for Approximating the Singular Value
Decomposition",
journal = j-TKDD,
volume = "5",
number = "2",
pages = "13:1--13:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1921632.1921639",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Mon Mar 28 11:44:01 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "A low-rank approximation to a matrix $A$ is a matrix
with significantly smaller rank than $A$, and which is
close to $A$ according to some norm. Many practical
applications involving the use of large matrices focus
on low-rank approximations. By reducing the rank or
dimensionality of the data, we reduce the complexity of
analyzing the data. The singular value decomposition is
the most popular low-rank matrix approximation.
However, due to its expensive computational
requirements, it has often been considered intractable
for practical applications involving massive data.
Recent developments have tried to address this problem,
with several methods proposed to approximate the
decomposition with better asymptotic runtime. We
present an empirical study of these techniques on a
variety of dense and sparse datasets. We find that a
sampling approach of Drineas, Kannan and Mahoney is
often, but not always, the best performing method. This
method gives solutions with high accuracy much faster
than classical SVD algorithms, on large sparse datasets
in particular. Other modern methods, such as a recent
algorithm by Rokhlin and Tygert, also offer savings
compared to classical SVD algorithms. The older
sampling methods of Achlioptas and McSherry are shown
to sometimes take longer than classical SVD.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2011:IDC,
author = "Dingding Wang and Shenghuo Zhu and Tao Li and Yun Chi
and Yihong Gong",
title = "Integrating Document Clustering and Multidocument
Summarization",
journal = j-TKDD,
volume = "5",
number = "3",
pages = "14:1--14:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1993077.1993078",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Thu Aug 18 13:28:08 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Maier:2011:INS,
author = "Marc Maier and Matthew Rattigan and David Jensen",
title = "Indexing Network Structure with Shortest-Path Trees",
journal = j-TKDD,
volume = "5",
number = "3",
pages = "15:1--15:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1993077.1993079",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Thu Aug 18 13:28:08 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wong:2011:CUA,
author = "Raymond Chi-Wing Wong and Ada Wai-Chee Fu and Ke Wang
and Philip S. Yu and Jian Pei",
title = "Can the Utility of Anonymized Data be Used for Privacy
Breaches?",
journal = j-TKDD,
volume = "5",
number = "3",
pages = "16:1--16:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1993077.1993080",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Thu Aug 18 13:28:08 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2011:CDM,
author = "Yu-Ru Lin and Jimeng Sun and Hari Sundaram and Aisling
Kelliher and Paul Castro and Ravi Konuru",
title = "Community Discovery via Metagraph Factorization",
journal = j-TKDD,
volume = "5",
number = "3",
pages = "17:1--17:??",
month = aug,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1993077.1993081",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
bibdate = "Thu Aug 18 13:28:08 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Elkan:2012:GES,
author = "Charles Elkan and Yehuda Koren",
title = "Guest Editorial for Special Issue {KDD'10}",
journal = j-TKDD,
volume = "5",
number = "4",
pages = "18:1--18:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086737.2086738",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 16 15:19:57 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Iwata:2012:SMT,
author = "Tomoharu Iwata and Takeshi Yamada and Yasushi Sakurai
and Naonori Ueda",
title = "Sequential Modeling of Topic Dynamics with Multiple
Timescales",
journal = j-TKDD,
volume = "5",
number = "4",
pages = "19:1--19:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086737.2086739",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 16 15:19:57 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We propose an online topic model for sequentially
analyzing the time evolution of topics in document
collections. Topics naturally evolve with multiple
timescales. For example, some words may be used
consistently over one hundred years, while other words
emerge and disappear over periods of a few days. Thus,
in the proposed model, current topic-specific
distributions over words are assumed to be generated
based on the multiscale word distributions of the
previous epoch. Considering both the long- and
short-timescale dependency yields a more robust model.
We derive efficient online inference procedures based
on a stochastic EM algorithm, in which the model is
sequentially updated using newly obtained data; this
means that past data are not required to make the
inference. We demonstrate the effectiveness of the
proposed method in terms of predictive performance and
computational efficiency by examining collections of
real documents with timestamps.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huh:2012:DTM,
author = "Seungil Huh and Stephen E. Fienberg",
title = "Discriminative Topic Modeling Based on Manifold
Learning",
journal = j-TKDD,
volume = "5",
number = "4",
pages = "20:1--20:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086737.2086740",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 16 15:19:57 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Topic modeling has become a popular method used for
data analysis in various domains including text
documents. Previous topic model approaches, such as
probabilistic Latent Semantic Analysis (pLSA) and
Latent Dirichlet Allocation (LDA), have shown
impressive success in discovering low-rank hidden
structures for modeling text documents. These
approaches, however do not take into account the
manifold structure of the data, which is generally
informative for nonlinear dimensionality reduction
mapping. More recent topic model approaches, Laplacian
PLSI (LapPLSI) and Locally-consistent Topic Model
(LTM), have incorporated the local manifold structure
into topic models and have shown resulting benefits.
But they fall short of achieving full discriminating
power of manifold learning as they only enhance the
proximity between the low-rank representations of
neighboring pairs without any consideration for
non-neighboring pairs. In this article, we propose a
new approach, Discriminative Topic Model (DTM), which
separates non-neighboring pairs from each other in
addition to bringing neighboring pairs closer together,
thereby preserving the global manifold structure as
well as improving local consistency. We also present a
novel model-fitting algorithm based on the generalized
EM algorithm and the concept of Pareto improvement. We
empirically demonstrate the success of DTM in terms of
unsupervised clustering and semisupervised
classification accuracies on text corpora and
robustness to parameters compared to state-of-the-art
techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gomez-Rodriguez:2012:IND,
author = "Manuel Gomez-Rodriguez and Jure Leskovec and Andreas
Krause",
title = "Inferring Networks of Diffusion and Influence",
journal = j-TKDD,
volume = "5",
number = "4",
pages = "21:1--21:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086737.2086741",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 16 15:19:57 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Information diffusion and virus propagation are
fundamental processes taking place in networks. While
it is often possible to directly observe when nodes
become infected with a virus or publish the
information, observing individual transmissions (who
infects whom, or who influences whom) is typically very
difficult. Furthermore, in many applications, the
underlying network over which the diffusions and
propagations spread is actually unobserved. We tackle
these challenges by developing a method for tracing
paths of diffusion and influence through networks and
inferring the networks over which contagions propagate.
Given the times when nodes adopt pieces of information
or become infected, we identify the optimal network
that best explains the observed infection times. Since
the optimization problem is NP-hard to solve exactly,
we develop an efficient approximation algorithm that
scales to large datasets and finds provably
near-optimal networks. We demonstrate the effectiveness
of our approach by tracing information diffusion in a
set of 170 million blogs and news articles over a one
year period to infer how information flows through the
online media space. We find that the diffusion network
of news for the top 1,000 media sites and blogs tends
to have a core-periphery structure with a small set of
core media sites that diffuse information to the rest
of the Web. These sites tend to have stable circles of
influence with more general news media sites acting as
connectors between them.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2012:LIS,
author = "Jianhui Chen and Ji Liu and Jieping Ye",
title = "Learning Incoherent Sparse and Low-Rank Patterns from
Multiple Tasks",
journal = j-TKDD,
volume = "5",
number = "4",
pages = "22:1--22:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086737.2086742",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 16 15:19:57 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We consider the problem of learning incoherent sparse
and low-rank patterns from multiple tasks. Our approach
is based on a linear multitask learning formulation, in
which the sparse and low-rank patterns are induced by a
cardinality regularization term and a low-rank
constraint, respectively. This formulation is
nonconvex; we convert it into its convex surrogate,
which can be routinely solved via semidefinite
programming for small-size problems. We propose
employing the general projected gradient scheme to
efficiently solve such a convex surrogate; however, in
the optimization formulation, the objective function is
nondifferentiable and the feasible domain is
nontrivial. We present the procedures for computing the
projected gradient and ensuring the global convergence
of the projected gradient scheme. The computation of
the projected gradient involves a constrained
optimization problem; we show that the optimal solution
to such a problem can be obtained via solving an
unconstrained optimization subproblem and a Euclidean
projection subproblem. We also present two projected
gradient algorithms and analyze their rates of
convergence in detail. In addition, we illustrate the
use of the presented projected gradient algorithms for
the proposed multitask learning formulation using the
least squares loss. Experimental results on a
collection of real-world data sets demonstrate the
effectiveness of the proposed multitask learning
formulation and the efficiency of the proposed
projected gradient algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2012:LLC,
author = "Hsiang-Fu Yu and Cho-Jui Hsieh and Kai-Wei Chang and
Chih-Jen Lin",
title = "Large Linear Classification When Data Cannot Fit in
Memory",
journal = j-TKDD,
volume = "5",
number = "4",
pages = "23:1--23:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086737.2086743",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 16 15:19:57 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Recent advances in linear classification have shown
that for applications such as document classification,
the training process can be extremely efficient.
However, most of the existing training methods are
designed by assuming that data can be stored in the
computer memory. These methods cannot be easily applied
to data larger than the memory capacity due to the
random access to the disk. We propose and analyze a
block minimization framework for data larger than the
memory size. At each step a block of data is loaded
from the disk and handled by certain learning methods.
We investigate two implementations of the proposed
framework for primal and dual SVMs, respectively.
Because data cannot fit in memory, many design
considerations are very different from those for
traditional algorithms. We discuss and compare with
existing approaches that are able to handle data larger
than memory. Experiments using data sets 20 times
larger than the memory demonstrate the effectiveness of
the proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shahaf:2012:CTL,
author = "Dafna Shahaf and Carlos Guestrin",
title = "Connecting Two (or Less) Dots: Discovering Structure
in News Articles",
journal = j-TKDD,
volume = "5",
number = "4",
pages = "24:1--24:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086737.2086744",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 16 15:19:57 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Finding information is becoming a major part of our
daily life. Entire sectors, from Web users to
scientists and intelligence analysts, are increasingly
struggling to keep up with the larger and larger
amounts of content published every day. With this much
data, it is often easy to miss the big picture. In this
article, we investigate methods for automatically
connecting the dots---providing a structured, easy way
to navigate within a new topic and discover hidden
connections. We focus on the news domain: given two
news articles, our system automatically finds a
coherent chain linking them together. For example, it
can recover the chain of events starting with the
decline of home prices (January 2007), and ending with
the health care debate (2009). We formalize the
characteristics of a good chain and provide a fast
search-driven algorithm to connect two fixed endpoints.
We incorporate user feedback into our framework,
allowing the stories to be refined and personalized. We
also provide a method to handle partially-specified
endpoints, for users who do not know both ends of a
story. Finally, we evaluate our algorithm over real
news data. Our user studies demonstrate that the
objective we propose captures the users' intuitive
notion of coherence, and that our algorithm effectively
helps users understand the news.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ienco:2012:CDL,
author = "Dino Ienco and Ruggero G. Pensa and Rosa Meo",
title = "From Context to Distance: Learning Dissimilarity for
Categorical Data Clustering",
journal = j-TKDD,
volume = "6",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2133360.2133361",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering data described by categorical attributes is
a challenging task in data mining applications. Unlike
numerical attributes, it is difficult to define a
distance between pairs of values of a categorical
attribute, since the values are not ordered. In this
article, we propose a framework to learn a
context-based distance for categorical attributes. The
key intuition of this work is that the distance between
two values of a categorical attribute A$_i$ can be
determined by the way in which the values of the other
attributes A$_j$ are distributed in the dataset
objects: if they are similarly distributed in the
groups of objects in correspondence of the distinct
values of A$_i$ a low value of distance is obtained. We
propose also a solution to the critical point of the
choice of the attributes A$_j$. We validate our
approach by embedding our distance learning framework
in a hierarchical clustering algorithm. We applied it
on various real world and synthetic datasets, both low
and high-dimensional. Experimental results show that
our method is competitive with respect to the state of
the art of categorical data clustering approaches. We
also show that our approach is scalable and has a low
impact on the overall computational time of a
clustering task.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2012:EMG,
author = "Chun Li and Qingyan Yang and Jianyong Wang and Ming
Li",
title = "Efficient Mining of Gap-Constrained Subsequences and
Its Various Applications",
journal = j-TKDD,
volume = "6",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2133360.2133362",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Mining frequent subsequence patterns is a typical
data-mining problem and various efficient sequential
pattern mining algorithms have been proposed. In many
application domains (e.g., biology), the frequent
subsequences confined by the predefined gap
requirements are more meaningful than the general
sequential patterns. In this article, we propose two
algorithms, Gap-BIDE for mining closed gap-constrained
subsequences from a set of input sequences, and
Gap-Connect for mining repetitive gap-constrained
subsequences from a single input sequence. Inspired by
some state-of-the-art closed or constrained sequential
pattern mining algorithms, the Gap-BIDE algorithm
adopts an efficient approach to finding the complete
set of closed sequential patterns with gap constraints,
while the Gap-Connect algorithm efficiently mines an
approximate set of long patterns by connecting short
patterns. We also present several methods for feature
selection from the set of gap-constrained patterns for
the purpose of classification and clustering. Our
extensive performance study shows that our approaches
are very efficient in mining frequent subsequences with
gap constraints, and the gap-constrained pattern based
classification/clustering approaches can achieve
high-quality results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2012:IBA,
author = "Fei Tony Liu and Kai Ming Ting and Zhi-Hua Zhou",
title = "Isolation-Based Anomaly Detection",
journal = j-TKDD,
volume = "6",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2133360.2133363",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Anomalies are data points that are few and different.
As a result of these properties, we show that,
anomalies are susceptible to a mechanism called
isolation. This article proposes a method called
Isolation Forest ($i$ Forest), which detects anomalies
purely based on the concept of isolation without
employing any distance or density
measure---fundamentally different from all existing
methods. As a result, $i$ Forest is able to exploit
subsampling (i) to achieve a low linear time-complexity
and a small memory-requirement and (ii) to deal with
the effects of swamping and masking effectively. Our
empirical evaluation shows that $i$ Forest outperforms
ORCA, one-class SVM, LOF and Random Forests in terms of
AUC, processing time, and it is robust against masking
and swamping effects. $i$ Forest also works well in
high dimensional problems containing a large number of
irrelevant attributes, and when anomalies are not
available in training sample.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jin:2012:MML,
author = "Yu Jin and Nick Duffield and Jeffrey Erman and Patrick
Haffner and Subhabrata Sen and Zhi-Li Zhang",
title = "A Modular Machine Learning System for Flow-Level
Traffic Classification in Large Networks",
journal = j-TKDD,
volume = "6",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2133360.2133364",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The ability to accurately and scalably classify
network traffic is of critical importance to a wide
range of management tasks of large networks, such as
tier-1 ISP networks and global enterprise networks.
Guided by the practical constraints and requirements of
traffic classification in large networks, in this
article, we explore the design of an accurate and
scalable machine learning based flow-level traffic
classification system, which is trained on a dataset of
flow-level data that has been annotated with
application protocol labels by a packet-level
classifier. Our system employs a lightweight modular
architecture, which combines a series of simple linear
binary classifiers, each of which can be efficiently
implemented and trained on vast amounts of flow data in
parallel, and embraces three key innovative mechanisms,
weighted threshold sampling, logistic calibration, and
intelligent data partitioning, to achieve scalability
while attaining high accuracy. Evaluations using real
traffic data from multiple locations in a large ISP
show that our system accurately reproduces the labels
of the packet level classifier when runs on (unlabeled)
flow records, while meeting the scalability and
stability requirements of large ISP networks. Using
training and test datasets that are two months apart
and collected from two different locations, the flow
error rates are only 3\% for TCP flows and 0.4\% for
UDP flows. We further show that such error rates can be
reduced by combining the information of spatial
distributions of flows, or collective traffic
statistics, during classification. We propose a novel
two-step model, which seamlessly integrates these
collective traffic statistics into the existing traffic
classification system. Experimental results display
performance improvement on all traffic classes and an
overall error rate reduction by 15\%. In addition to a
high accuracy, at runtime, our implementation easily
scales to classify traffic on 10Gbps links.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mavroeidis:2012:SSF,
author = "Dimitrios Mavroeidis and Panagis Magdalinos",
title = "A Sequential Sampling Framework for Spectral $k$-Means
Based on Efficient Bootstrap Accuracy Estimations:
Application to Distributed Clustering",
journal = j-TKDD,
volume = "6",
number = "2",
pages = "5:1--5:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2297456.2297457",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The scalability of learning algorithms has always been
a central concern for data mining researchers, and
nowadays, with the rapid increase in data storage
capacities and availability, its importance has
increased. To this end, sampling has been studied by
several researchers in an effort to derive sufficiently
accurate models using only small data fractions. In
this article we focus on spectral $k$-means, that is,
the $k$-means approximation as derived by the spectral
relaxation, and propose a sequential sampling framework
that iteratively enlarges the sample size until the
$k$-means results (objective function and cluster
structure) become indistinguishable from the asymptotic
(infinite-data) output. In the proposed framework we
adopt a commonly applied principle in data mining
research that considers the use of minimal assumptions
concerning the data generating distribution. This
restriction imposes several challenges, mainly related
to the efficiency of the sequential sampling procedure.
These challenges are addressed using elements of matrix
perturbation theory and statistics. Moreover, although
the main focus is on spectral $k$-means, we also
demonstrate that the proposed framework can be
generalized to handle spectral clustering. The proposed
sequential sampling framework is consecutively employed
for addressing the distributed clustering problem,
where the task is to construct a global model for data
that resides in distributed network nodes. The main
challenge in this context is related to the bandwidth
constraints that are commonly imposed, thus requiring
that the distributed clustering algorithm consumes a
minimal amount of network load. This illustrates the
applicability of the proposed approach, as it enables
the determination of a minimal sample size that can be
used for constructing an accurate clustering model that
entails the distributional characteristics of the data.
As opposed to the relevant distributed $k$-means
approaches, our framework takes into account the fact
that the choice of the number of clusters has a crucial
effect on the required amount of communication. More
precisely, the proposed algorithm is able to derive a
statistical estimation of the required relative sizes
for all possible values of $k$. This unique feature of
our distributed clustering framework enables a network
administrator to choose an economic solution that
identifies the crude cluster structure of a dataset and
not devote excessive network resources for identifying
all the ``correct'' detailed clusters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Das:2012:MIG,
author = "Sanmay Das and Malik Magdon-Ismail",
title = "A Model for Information Growth in Collective Wisdom
Processes",
journal = j-TKDD,
volume = "6",
number = "2",
pages = "6:1--6:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2297456.2297458",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Collaborative media such as wikis have become
enormously successful venues for information creation.
Articles accrue information through the asynchronous
editing of users who arrive both seeking information
and possibly able to contribute information. Most
articles stabilize to high-quality, trusted sources of
information representing the collective wisdom of all
the users who edited the article. We propose a model
for information growth which relies on two main
observations: (i) as an article's quality improves, it
attracts visitors at a faster rate (a rich-get-richer
phenomenon); and, simultaneously, (ii) the chances that
a new visitor will improve the article drops (there is
only so much that can be said about a particular
topic). Our model is able to reproduce many features of
the edit dynamics observed on Wikipedia; in particular,
it captures the observed rise in the edit rate,
followed by $ 1 / t $ decay. Despite differences in the
media, we also document similar features in the comment
rates for a segment of the LiveJournal blogosphere.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2012:GME,
author = "Tianbing Xu and Zhongfei Zhang and Philip S. Yu and Bo
Long",
title = "Generative Models for Evolutionary Clustering",
journal = j-TKDD,
volume = "6",
number = "2",
pages = "7:1--7:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2297456.2297459",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This article studies evolutionary clustering, a
recently emerged hot topic with many important
applications, noticeably in dynamic social network
analysis. In this article, based on the recent
literature on nonparametric Bayesian models, we have
developed two generative models: DPChain and HDP-HTM.
DPChain is derived from the Dirichlet process mixture
(DPM) model, with an exponential decaying component
along with the time. HDP-HTM combines the hierarchical
dirichlet process (HDP) with a hierarchical transition
matrix (HTM) based on the proposed Infinite
hierarchical Markov state model (iHMS). Both models
substantially advance the literature on evolutionary
clustering, in the sense that not only do they both
perform better than those in the existing literature,
but more importantly, they are capable of automatically
learning the cluster numbers and explicitly addressing
the corresponding issues. Extensive evaluations have
demonstrated the effectiveness and the promise of these
two solutions compared to the state-of-the-art
literature.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2012:LME,
author = "Shaojun Wang and Dale Schuurmans and Yunxin Zhao",
title = "The Latent Maximum Entropy Principle",
journal = j-TKDD,
volume = "6",
number = "2",
pages = "8:1--8:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2297456.2297460",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present an extension to Jaynes' maximum entropy
principle that incorporates latent variables. The
principle of latent maximum entropy we propose is
different from both Jaynes' maximum entropy principle
and maximum likelihood estimation, but can yield better
estimates in the presence of hidden variables and
limited training data. We first show that solving for a
latent maximum entropy model poses a hard nonlinear
constrained optimization problem in general. However,
we then show that feasible solutions to this problem
can be obtained efficiently for the special case of
log-linear models---which forms the basis for an
efficient approximation to the latent maximum entropy
principle. We derive an algorithm that combines
expectation-maximization with iterative scaling to
produce feasible log-linear solutions. This algorithm
can be interpreted as an alternating minimization
algorithm in the information divergence, and reveals an
intimate connection between the latent maximum entropy
and maximum likelihood principles. To select a final
model, we generate a series of feasible candidates,
calculate the entropy of each, and choose the model
that attains the highest entropy. Our experimental
results show that estimation based on the latent
maximum entropy principle generally gives better
results than maximum likelihood when estimating latent
variable models on small observed data samples.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bhattacharya:2012:CGC,
author = "Indrajit Bhattacharya and Shantanu Godbole and
Sachindra Joshi and Ashish Verma",
title = "Cross-Guided Clustering: Transfer of Relevant
Supervision across Tasks",
journal = j-TKDD,
volume = "6",
number = "2",
pages = "9:1--9:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2297456.2297461",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Lack of supervision in clustering algorithms often
leads to clusters that are not useful or interesting to
human reviewers. We investigate if supervision can be
automatically transferred for clustering a target task,
by providing a relevant supervised partitioning of a
dataset from a different source task. The target
clustering is made more meaningful for the human user
by trading-off intrinsic clustering goodness on the
target task for alignment with relevant supervised
partitions in the source task, wherever possible. We
propose a cross-guided clustering algorithm that builds
on traditional k-means by aligning the target clusters
with source partitions. The alignment process makes use
of a cross-task similarity measure that discovers
hidden relationships across tasks. When the source and
target tasks correspond to different domains with
potentially different vocabularies, we propose a
projection approach using pivot vocabularies for the
cross-domain similarity measure. Using multiple
real-world and synthetic datasets, we show that our
approach improves clustering accuracy significantly
over traditional k-means and state-of-the-art
semi-supervised clustering baselines, over a wide range
of data characteristics and parameter settings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2012:LBN,
author = "Zhenxing Wang and Laiwan Chan",
title = "Learning {Bayesian} networks from {Markov} random
fields: an efficient algorithm for linear models",
journal = j-TKDD,
volume = "6",
number = "3",
pages = "10:1--10:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362383.2362384",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:40 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Dependency analysis is a typical approach for Bayesian
network learning, which infers the structures of
Bayesian networks by the results of a series of
conditional independence (CI) tests. In practice,
testing independence conditioning on large sets hampers
the performance of dependency analysis algorithms in
terms of accuracy and running time for the following
reasons. First, testing independence on large sets of
variables with limited samples is not stable. Second,
for most dependency analysis algorithms, the number of
CI tests grows at an exponential rate with the sizes of
conditioning sets, and the running time grows of the
same rate. Therefore, determining how to reduce the
number of CI tests and the sizes of conditioning sets
becomes a critical step in dependency analysis
algorithms. In this article, we address a two-phase
algorithm based on the observation that the structures
of Markov random fields are similar to those of
Bayesian networks. The first phase of the algorithm
constructs a Markov random field from data, which
provides a close approximation to the structure of the
true Bayesian network; the second phase of the
algorithm removes redundant edges according to CI tests
to get the true Bayesian network. Both phases use
Markov blanket information to reduce the sizes of
conditioning sets and the number of CI tests without
sacrificing accuracy. An empirical study shows that the
two-phase algorithm performs well in terms of accuracy
and efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chan:2012:CID,
author = "Jeffrey Chan and James Bailey and Christopher Leckie
and Michael Houle",
title = "{ciForager}: Incrementally discovering regions of
correlated change in evolving graphs",
journal = j-TKDD,
volume = "6",
number = "3",
pages = "11:1--11:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362383.2362385",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:40 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Data mining techniques for understanding how graphs
evolve over time have become increasingly important.
Evolving graphs arise naturally in diverse applications
such as computer network topologies, multiplayer games
and medical imaging. A natural and interesting problem
in evolving graph analysis is the discovery of compact
subgraphs that change in a similar manner. Such
subgraphs are known as regions of correlated change and
they can both summarise change patterns in graphs and
help identify the underlying events causing these
changes. However, previous techniques for discovering
regions of correlated change suffer from limited
scalability, making them unsuitable for analysing the
evolution of very large graphs. In this paper, we
introduce a new algorithm called ciForager, that
addresses this scalability challenge and offers
considerable improvements. The efficiency of ciForager
is based on the use of new incremental techniques for
detecting change, as well as the use of Voronoi
representations for efficiently determining distance.
We experimentally show that ciForager can achieve
speedups of up to 1000 times over previous approaches.
As a result, it becomes feasible for the first time to
discover regions of correlated change in extremely
large graphs, such as the entire BGP routing topology
of the Internet.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2012:CDS,
author = "Dingding Wang and Shenghuo Zhu and Tao Li and Yihong
Gong",
title = "Comparative document summarization via discriminative
sentence selection",
journal = j-TKDD,
volume = "6",
number = "3",
pages = "12:1--12:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362383.2362386",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:40 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a collection of document groups, a natural
question is to identify the differences among them.
Although traditional document summarization techniques
can summarize the content of the document groups one by
one, there exists a great necessity to generate a
summary of the differences among the document groups.
In this article, we study a novel problem, that of
summarizing the differences between document groups. A
discriminative sentence selection method is proposed to
extract the most discriminative sentences which
represent the specific characteristics of each document
group. Experiments and case studies on real-world data
sets demonstrate the effectiveness of our proposed
method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{deMelo:2012:FNO,
author = "Pedro O. S. {Vaz de Melo} and Virgilio A. F. Almeida
and Antonio A. F. Loureiro and Christos Faloutsos",
title = "Forecasting in the {NBA} and other team sports:
Network effects in action",
journal = j-TKDD,
volume = "6",
number = "3",
pages = "13:1--13:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362383.2362387",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Nov 6 18:30:40 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The multi-million sports-betting market is based on
the fact that the task of predicting the outcome of a
sports event is very hard. Even with the aid of an
uncountable number of descriptive statistics and
background information, only a few can correctly guess
the outcome of a game or a league. In this work, our
approach is to move away from the traditional way of
predicting sports events, and instead to model sports
leagues as networks of players and teams where the only
information available is the work relationships among
them. We propose two network-based models to predict
the behavior of teams in sports leagues. These models
are parameter-free, that is, they do not have a single
parameter, and moreover are sport-agnostic: they can be
applied directly to any team sports league. First, we
view a sports league as a network in evolution, and we
infer the implicit feedback behind network changes and
properties over the years. Then, we use this knowledge
to construct the network-based prediction models, which
can, with a significantly high probability, indicate
how well a team will perform over a season. We compare
our proposed models with other prediction models in two
of the most popular sports leagues: the National
Basketball Association (NBA) and the Major League
Baseball (MLB). Our model shows consistently good
results in comparison with the other models and,
relying upon the network properties of the teams, we
achieved a $ \approx 14 \% $ rank prediction accuracy
improvement over our best competitor.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ghosh:2012:SIB,
author = "Joydeep Ghosh and Padhraic Smyth and Andrew Tomkins
and Rich Caruana",
title = "Special issue on best of {SIGKDD 2011}",
journal = j-TKDD,
volume = "6",
number = "4",
pages = "14:1--14:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2382577.2382578",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:40 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kaufman:2012:LDM,
author = "Shachar Kaufman and Saharon Rosset and Claudia Perlich
and Ori Stitelman",
title = "Leakage in data mining: Formulation, detection, and
avoidance",
journal = j-TKDD,
volume = "6",
number = "4",
pages = "15:1--15:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2382577.2382579",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:40 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Deemed ``one of the top ten data mining mistakes'',
leakage is the introduction of information about the
data mining target that should not be legitimately
available to mine from. In addition to our own industry
experience with real-life projects, controversies
around several major public data mining competitions
held recently such as the INFORMS 2010 Data Mining
Challenge and the IJCNN 2011 Social Network Challenge
are evidence that this issue is as relevant today as it
has ever been. While acknowledging the importance and
prevalence of leakage in both synthetic competitions
and real-life data mining projects, existing literature
has largely left this idea unexplored. What little has
been said turns out not to be broad enough to cover
more complex cases of leakage, such as those where the
classical independently and identically distributed
(i.i.d.) assumption is violated, that have been
recently documented. In our new approach, these cases
and others are explained by explicitly defining
modeling goals and analyzing the broader framework of
the data mining problem. The resulting definition
enables us to derive general methodology for dealing
with the issue. We show that it is possible to avoid
leakage with a simple specific approach to data
management followed by what we call a learn-predict
separation, and present several ways of detecting
leakage when the modeler has no control over how the
data have been collected. We also offer an alternative
point of view on leakage that is based on causal graph
modeling concepts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mampaey:2012:SDS,
author = "Michael Mampaey and Jilles Vreeken and Nikolaj Tatti",
title = "Summarizing data succinctly with the most informative
itemsets",
journal = j-TKDD,
volume = "6",
number = "4",
pages = "16:1--16:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2382577.2382580",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:40 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Knowledge discovery from data is an inherently
iterative process. That is, what we know about the data
greatly determines our expectations, and therefore,
what results we would find interesting and/or
surprising. Given new knowledge about the data, our
expectations will change. Hence, in order to avoid
redundant results, knowledge discovery algorithms
ideally should follow such an iterative updating
procedure. With this in mind, we introduce a
well-founded approach for succinctly summarizing data
with the most informative itemsets; using a
probabilistic maximum entropy model, we iteratively
find the itemset that provides us the most novel
information-that is, for which the frequency in the
data surprises us the most-and in turn we update our
model accordingly. As we use the maximum entropy
principle to obtain unbiased probabilistic models, and
only include those itemsets that are most informative
with regard to the current model, the summaries we
construct are guaranteed to be both descriptive and
nonredundant. The algorithm that we present, called
mtv, can either discover the top-$k$ most informative
itemsets, or we can employ either the Bayesian
Information Criterion (bic) or the Minimum Description
Length (mdl) principle to automatically identify the
set of itemsets that together summarize the data well.
In other words, our method will ``tell you what you
need to know'' about the data. Importantly, it is a
one-phase algorithm: rather than picking itemsets from
a user-provided candidate set, itemsets and their
supports are mined on-the-fly. To further its
applicability, we provide an efficient method to
compute the maximum entropy distribution using Quick
Inclusion-Exclusion. Experiments on our method, using
synthetic, benchmark, and real data, show that the
discovered summaries are succinct, and correctly
identify the key patterns in the data. The models they
form attain high likelihoods, and inspection shows that
they summarize the data well with increasingly
specific, yet nonredundant itemsets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chu:2012:TLM,
author = "Shumo Chu and James Cheng",
title = "Triangle listing in massive networks",
journal = j-TKDD,
volume = "6",
number = "4",
pages = "17:1--17:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2382577.2382581",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:40 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Triangle listing is one of the fundamental algorithmic
problems whose solution has numerous applications
especially in the analysis of complex networks, such as
the computation of clustering coefficients,
transitivity, triangular connectivity, trusses, etc.
Existing algorithms for triangle listing are mainly
in-memory algorithms, whose performance cannot scale
with the massive volume of today's fast growing
networks. When the input graph cannot fit in main
memory, triangle listing requires random disk accesses
that can incur prohibitively huge I/O cost. Some
streaming, semistreaming, and sampling algorithms have
been proposed but these are approximation algorithms.
We propose an I/O-efficient algorithm for triangle
listing. Our algorithm is exact and avoids random disk
access. Our results show that our algorithm is scalable
and outperforms the state-of-the-art in-memory and
local triangle estimation algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chattopadhyay:2012:MDA,
author = "Rita Chattopadhyay and Qian Sun and Wei Fan and Ian
Davidson and Sethuraman Panchanathan and Jieping Ye",
title = "Multisource domain adaptation and its application to
early detection of fatigue",
journal = j-TKDD,
volume = "6",
number = "4",
pages = "18:1--18:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2382577.2382582",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:40 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We consider the characterization of muscle fatigue
through a noninvasive sensing mechanism such as Surface
ElectroMyoGraphy (SEMG). While changes in the
properties of SEMG signals with respect to muscle
fatigue have been reported in the literature, the large
variation in these signals across different individuals
makes the task of modeling and classification of SEMG
signals challenging. Indeed, the variation in SEMG
parameters from subject to subject creates differences
in the data distribution. In this article, we propose
two transfer learning frameworks based on the
multisource domain adaptation methodology for detecting
different stages of fatigue using SEMG signals, that
addresses the distribution differences. In the proposed
frameworks, the SEMG data of a subject represent a
domain; data from multiple subjects in the training set
form the multiple source domains and the test subject
data form the target domain. SEMG signals are
predominantly different in conditional probability
distribution across subjects. The key feature of the
first framework is a novel weighting scheme that
addresses the conditional probability distribution
differences across multiple domains (subjects) and the
key feature of the second framework is a two-stage
domain adaptation methodology which combines weighted
data from multiple sources based on marginal
probability differences (first stage) as well as
conditional probability differences (second stage),
with the target domain data. The weights for minimizing
the marginal probability differences are estimated
independently, while the weights for minimizing
conditional probability differences are computed
simultaneously by exploiting the potential interaction
among multiple sources. We also provide a theoretical
analysis on the generalization performance of the
proposed multisource domain adaptation formulation
using the weighted Rademacher complexity measure. We
have validated the proposed frameworks on Surface
ElectroMyoGram signals collected from 8 people during a
fatigue-causing repetitive gripping activity.
Comprehensive experiments on the SEMG dataset
demonstrate that the proposed method improves the
classification accuracy by 20\% to 30\% over the cases
without any domain adaptation method and by 13\% to
30\% over existing state-of-the-art domain adaptation
methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wilkinson:2012:SIS,
author = "Leland Wilkinson and Anushka Anand and Tuan Nhon
Dang",
title = "Substantial improvements in the set-covering
projection classifier {CHIRP} (composite hypercubes on
iterated random projections)",
journal = j-TKDD,
volume = "6",
number = "4",
pages = "19:1--19:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2382577.2382583",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:40 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In Wilkinson et al. [2011] we introduced a new
set-covering random projection classifier that achieved
average error lower than that of other classifiers in
the Weka platform. This classifier was based on an $
L^\infty $ norm distance function and exploited an
iterative sequence of three stages (projecting,
binning, and covering) to deal with the curse of
dimensionality, computational complexity, and nonlinear
separability. We now present substantial changes that
improve robustness and reduce training and testing time
by almost an order of magnitude without jeopardizing
CHIRP's outstanding error performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Angiulli:2013:NNB,
author = "Fabrizio Angiulli and Fabio Fassetti",
title = "Nearest Neighbor-Based Classification of Uncertain
Data",
journal = j-TKDD,
volume = "7",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435209.2435210",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:44 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This work deals with the problem of classifying
uncertain data. With this aim we introduce the
Uncertain Nearest Neighbor (UNN) rule, which represents
the generalization of the deterministic nearest
neighbor rule to the case in which uncertain objects
are available. The UNN rule relies on the concept of
nearest neighbor class, rather than on that of nearest
neighbor object. The nearest neighbor class of a test
object is the class that maximizes the probability of
providing its nearest neighbor. The evidence is that
the former concept is much more powerful than the
latter in the presence of uncertainty, in that it
correctly models the right semantics of the nearest
neighbor decision rule when applied to the uncertain
scenario. An effective and efficient algorithm to
perform uncertain nearest neighbor classification of a
generic (un)certain test object is designed, based on
properties that greatly reduce the temporal cost
associated with nearest neighbor class probability
computation. Experimental results are presented,
showing that the UNN rule is effective and efficient in
classifying uncertain data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2013:CDS,
author = "Dingding Wang and Shenghuo Zhu and Tao Li and Yihong
Gong",
title = "Comparative Document Summarization via Discriminative
Sentence Selection",
journal = j-TKDD,
volume = "7",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435209.2435211",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:44 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a collection of document groups, a natural
question is to identify the differences among these
groups. Although traditional document summarization
techniques can summarize the content of the document
groups one by one, there exists a great necessity to
generate a summary of the differences among the
document groups. In this article, we study a novel
problem of summarizing the differences between document
groups. A discriminative sentence selection method is
proposed to extract the most discriminative sentences
that represent the specific characteristics of each
document group. Experiments and case studies on
real-world data sets demonstrate the effectiveness of
our proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bayati:2013:MPA,
author = "Mohsen Bayati and David F. Gleich and Amin Saberi and
Ying Wang",
title = "Message-Passing Algorithms for Sparse Network
Alignment",
journal = j-TKDD,
volume = "7",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435209.2435212",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:44 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Network alignment generalizes and unifies several
approaches for forming a matching or alignment between
the vertices of two graphs. We study a mathematical
programming framework for network alignment problem and
a sparse variation of it where only a small number of
matches between the vertices of the two graphs are
possible. We propose a new message passing algorithm
that allows us to compute, very efficiently,
approximate solutions to the sparse network alignment
problems with graph sizes as large as hundreds of
thousands of vertices. We also provide extensive
simulations comparing our algorithms with two of the
best solvers for network alignment problems on two
synthetic matching problems, two bioinformatics
problems, and three large ontology alignment problems
including a multilingual problem with a known labeled
alignment.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2013:CWM,
author = "Bin Li and Steven C. H. Hoi and Peilin Zhao and
Vivekanand Gopalkrishnan",
title = "Confidence Weighted Mean Reversion Strategy for Online
Portfolio Selection",
journal = j-TKDD,
volume = "7",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435209.2435213",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jun 24 13:02:44 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Online portfolio selection has been attracting
increasing attention from the data mining and machine
learning communities. All existing online portfolio
selection strategies focus on the first order
information of a portfolio vector, though the second
order information may also be beneficial to a strategy.
Moreover, empirical evidence shows that relative stock
prices may follow the mean reversion property, which
has not been fully exploited by existing strategies.
This article proposes a novel online portfolio
selection strategy named Confidence Weighted Mean
Reversion (CWMR). Inspired by the mean reversion
principle in finance and confidence weighted online
learning technique in machine learning, CWMR models the
portfolio vector as a Gaussian distribution, and
sequentially updates the distribution by following the
mean reversion trading principle. CWMR's closed-form
updates clearly reflect the mean reversion trading
idea. We also present several variants of CWMR
algorithms, including a CWMR mixture algorithm that is
theoretical universal. Empirically, CWMR strategy is
able to effectively exploit the power of mean reversion
for online portfolio selection. Extensive experiments
on various real markets show that the proposed strategy
is superior to the state-of-the-art techniques. The
experimental testbed including source codes and data
sets is available online.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lou:2013:LPR,
author = "Tiancheng Lou and Jie Tang and John Hopcroft and
Zhanpeng Fang and Xiaowen Ding",
title = "Learning to predict reciprocity and triadic closure in
social networks",
journal = j-TKDD,
volume = "7",
number = "2",
pages = "5:1--5:??",
month = jul,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2499907.2499908",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:06 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We study how links are formed in social networks. In
particular, we focus on investigating how a reciprocal
(two-way) link, the basic relationship in social
networks, is developed from a parasocial (one-way)
relationship and how the relationships further develop
into triadic closure, one of the fundamental processes
of link formation. We first investigate how geographic
distance and interactions between users influence the
formation of link structure among users. Then we study
how social theories including homophily, social
balance, and social status are satisfied over networks
with parasocial and reciprocal relationships. The study
unveils several interesting phenomena. For example,
``friend's friend is a friend'' indeed exists in the
reciprocal relationship network, but does not hold in
the parasocial relationship network. We propose a
learning framework to formulate the problems of
predicting reciprocity and triadic closure into a
graphical model. We demonstrate that it is possible to
accurately infer 90\% of reciprocal relationships in a
Twitter network. The proposed model also achieves
better performance (+20--30\% in terms of F1-measure)
than several alternative methods for predicting the
triadic closure formation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2013:EOL,
author = "Haiqin Yang and Michael R. Lyu and Irwin King",
title = "Efficient online learning for multitask feature
selection",
journal = j-TKDD,
volume = "7",
number = "2",
pages = "6:1--6:??",
month = jul,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2499907.2499909",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:06 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Learning explanatory features across multiple related
tasks, or MultiTask Feature Selection (MTFS), is an
important problem in the applications of data mining,
machine learning, and bioinformatics. Previous MTFS
methods fulfill this task by batch-mode training. This
makes them inefficient when data come sequentially or
when the number of training data is so large that they
cannot be loaded into the memory simultaneously. In
order to tackle these problems, we propose a novel
online learning framework to solve the MTFS problem. A
main advantage of the online algorithm is its
efficiency in both time complexity and memory cost. The
weights of the MTFS models at each iteration can be
updated by closed-form solutions based on the average
of previous subgradients. This yields the worst-case
bounds of the time complexity and memory cost at each
iteration, both in the order of $ O(d \times Q) $,
where $d$ is the number of feature dimensions and $Q$
is the number of tasks. Moreover, we provide
theoretical analysis for the average regret of the
online learning algorithms, which also guarantees the
convergence rate of the algorithms. Finally, we conduct
detailed experiments to show the characteristics and
merits of the online learning algorithms in solving
several MTFS problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2013:MRL,
author = "Yu Zhang and Dit-Yan Yeung",
title = "Multilabel relationship learning",
journal = j-TKDD,
volume = "7",
number = "2",
pages = "7:1--7:??",
month = jul,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2499907.2499910",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:06 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multilabel learning problems are commonly found in
many applications. A characteristic shared by many
multilabel learning problems is that some labels have
significant correlations between them. In this article,
we propose a novel multilabel learning method, called
MultiLabel Relationship Learning (MLRL), which extends
the conventional support vector machine by explicitly
learning and utilizing the relationships between
labels. Specifically, we model the label relationships
using a label covariance matrix and use it to define a
new regularization term for the optimization problem.
MLRL learns the model parameters and the label
covariance matrix simultaneously based on a unified
convex formulation. To solve the convex optimization
problem, we use an alternating method in which each
subproblem can be solved efficiently. The relationship
between MLRL and two widely used maximum margin methods
for multilabel learning is investigated. Moreover, we
also propose a semisupervised extension of MLRL, called
SSMLRL, to demonstrate how to make use of unlabeled
data to help learn the label covariance matrix. Through
experiments conducted on some multilabel applications,
we find that MLRL not only gives higher classification
accuracy but also has better interpretability as
revealed by the label covariance matrix.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Peng:2013:EFF,
author = "Jing Peng and Guna Seetharaman and Wei Fan and Aparna
Varde",
title = "Exploiting {Fisher} and {Fukunaga--Koontz} transforms
in {Chernoff} dimensionality reduction",
journal = j-TKDD,
volume = "7",
number = "2",
pages = "8:1--8:??",
month = jul,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2499907.2499911",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:06 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Knowledge discovery from big data demands effective
representation of data. However, big data are often
characterized by high dimensionality, which makes
knowledge discovery more difficult. Many techniques for
dimensionality reduction have been proposed, including
well-known Fisher's Linear Discriminant Analysis (LDA).
However, the Fisher criterion is incapable of dealing
with heteroscedasticity in the data. A technique based
on the Chernoff criterion for linear dimensionality
reduction has been proposed that is capable of
exploiting heteroscedastic information in the data.
While the Chernoff criterion has been shown to
outperform the Fisher's, a clear understanding of its
exact behavior is lacking. In this article, we show
precisely what can be expected from the Chernoff
criterion. In particular, we show that the Chernoff
criterion exploits the Fisher and Fukunaga-Koontz
transforms in computing its linear discriminants.
Furthermore, we show that a recently proposed
decomposition of the data space into four subspaces is
incomplete. We provide arguments on how to best enrich
the decomposition of the data space in order to account
for heteroscedasticity in the data. Finally, we provide
experimental results validating our theoretical
analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Agarwal:2013:ISI,
author = "Deepak Agarwal and Rich Caruana and Jian Pei and Ke
Wang",
title = "Introduction to the {Special Issue ACM SIGKDD 2012}",
journal = j-TKDD,
volume = "7",
number = "3",
pages = "9:1--9:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2513092.2513093",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:07 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rakthanmanon:2013:ABD,
author = "Thanawin Rakthanmanon and Bilson Campana and Abdullah
Mueen and Gustavo Batista and Brandon Westover and
Qiang Zhu and Jesin Zakaria and Eamonn Keogh",
title = "Addressing Big Data Time Series: Mining Trillions of
Time Series Subsequences Under Dynamic Time Warping",
journal = j-TKDD,
volume = "7",
number = "3",
pages = "10:1--10:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2500489",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:07 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Most time series data mining algorithms use similarity
search as a core subroutine, and thus the time taken
for similarity search is the bottleneck for virtually
all time series data mining algorithms, including
classification, clustering, motif discovery, anomaly
detection, and so on. The difficulty of scaling a
search to large datasets explains to a great extent why
most academic work on time series data mining has
plateaued at considering a few millions of time series
objects, while much of industry and science sits on
billions of time series objects waiting to be explored.
In this work we show that by using a combination of
four novel ideas we can search and mine massive time
series for the first time. We demonstrate the following
unintuitive fact: in large datasets we can exactly
search under Dynamic Time Warping (DTW) much more
quickly than the current state-of-the-art Euclidean
distance search algorithms. We demonstrate our work on
the largest set of time series experiments ever
attempted. In particular, the largest dataset we
consider is larger than the combined size of all of the
time series datasets considered in all data mining
papers ever published. We explain how our ideas allow
us to solve higher-level time series data mining
problems such as motif discovery and clustering at
scales that would otherwise be untenable. Moreover, we
show how our ideas allow us to efficiently support the
uniform scaling distance measure, a measure whose
utility seems to be underappreciated, but which we
demonstrate here. In addition to mining massive
datasets with up to one trillion datapoints, we will
show that our ideas also have implications for
real-time monitoring of data streams, allowing us to
handle much faster arrival rates and/or use cheaper and
lower powered devices than are currently possible.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2013:PIM,
author = "Yizhou Sun and Brandon Norick and Jiawei Han and
Xifeng Yan and Philip S. Yu and Xiao Yu",
title = "{PathSelClus}: Integrating Meta-Path Selection with
User-Guided Object Clustering in Heterogeneous
Information Networks",
journal = j-TKDD,
volume = "7",
number = "3",
pages = "11:1--11:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2500492",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:07 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Real-world, multiple-typed objects are often
interconnected, forming heterogeneous information
networks. A major challenge for link-based clustering
in such networks is their potential to generate many
different results, carrying rather diverse semantic
meanings. In order to generate desired clustering, we
propose to use meta-path, a path that connects object
types via a sequence of relations, to control
clustering with distinct semantics. Nevertheless, it is
easier for a user to provide a few examples (seeds)
than a weighted combination of sophisticated meta-paths
to specify her clustering preference. Thus, we propose
to integrate meta-path selection with user-guided
clustering to cluster objects in networks, where a user
first provides a small set of object seeds for each
cluster as guidance. Then the system learns the weight
for each meta-path that is consistent with the
clustering result implied by the guidance, and
generates clusters under the learned weights of
meta-paths. A probabilistic approach is proposed to
solve the problem, and an effective and efficient
iterative algorithm, PathSelClus, is proposed to learn
the model, where the clustering quality and the
meta-path weights mutually enhance each other. Our
experiments with several clustering tasks in two real
networks and one synthetic network demonstrate the
power of the algorithm in comparison with the
baselines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bellare:2013:ASE,
author = "Kedar Bellare and Suresh Iyengar and Aditya
Parameswaran and Vibhor Rastogi",
title = "Active Sampling for Entity Matching with Guarantees",
journal = j-TKDD,
volume = "7",
number = "3",
pages = "12:1--12:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2500490",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:07 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In entity matching, a fundamental issue while training
a classifier to label pairs of entities as either
duplicates or nonduplicates is the one of selecting
informative training examples. Although active learning
presents an attractive solution to this problem,
previous approaches minimize the misclassification rate
(0--1 loss) of the classifier, which is an unsuitable
metric for entity matching due to class imbalance
(i.e., many more nonduplicate pairs than duplicate
pairs). To address this, a recent paper [Arasu et al.
2010] proposes to maximize recall of the classifier
under the constraint that its precision should be
greater than a specified threshold. However, the
proposed technique requires the labels of all n input
pairs in the worst case. Our main result is an active
learning algorithm that approximately maximizes recall
of the classifier while respecting a precision
constraint with provably sublinear label complexity
(under certain distributional assumptions). Our
algorithm uses as a black box any active learning
module that minimizes 0--1 loss. We show that label
complexity of our algorithm is at most log n times the
label complexity of the black box, and also bound the
difference in the recall of classifier learnt by our
algorithm and the recall of the optimal classifier
satisfying the precision constraint. We provide an
empirical evaluation of our algorithm on several
real-world matching data sets that demonstrates the
effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chattopadhyay:2013:BMA,
author = "Rita Chattopadhyay and Zheng Wang and Wei Fan and Ian
Davidson and Sethuraman Panchanathan and Jieping Ye",
title = "Batch Mode Active Sampling Based on Marginal
Probability Distribution Matching",
journal = j-TKDD,
volume = "7",
number = "3",
pages = "13:1--13:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2513092.2513094",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:07 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Active Learning is a machine learning and data mining
technique that selects the most informative samples for
labeling and uses them as training data; it is
especially useful when there are large amount of
unlabeled data and labeling them is expensive.
Recently, batch-mode active learning, where a set of
samples are selected concurrently for labeling, based
on their collective merit, has attracted a lot of
attention. The objective of batch-mode active learning
is to select a set of informative samples so that a
classifier learned on these samples has good
generalization performance on the unlabeled data. Most
of the existing batch-mode active learning
methodologies try to achieve this by selecting samples
based on certain criteria. In this article we propose a
novel criterion which achieves good generalization
performance of a classifier by specifically selecting a
set of query samples that minimize the difference in
distribution between the labeled and the unlabeled
data, after annotation. We explicitly measure this
difference based on all candidate subsets of the
unlabeled data and select the best subset. The proposed
objective is an NP-hard integer programming
optimization problem. We provide two optimization
techniques to solve this problem. In the first one, the
problem is transformed into a convex quadratic
programming problem and in the second method the
problem is transformed into a linear programming
problem. Our empirical studies using publicly available
UCI datasets and two biomedical image databases
demonstrate the effectiveness of the proposed approach
in comparison with the state-of-the-art batch-mode
active learning methods. We also present two extensions
of the proposed approach, which incorporate uncertainty
of the predicted labels of the unlabeled data and
transfer learning in the proposed formulation. In
addition, we present a joint optimization framework for
performing both transfer and active learning
simultaneously unlike the existing approaches of
learning in two separate stages, that is, typically,
transfer learning followed by active learning. We
specifically minimize a common objective of reducing
distribution difference between the domain adapted
source, the queried and labeled samples and the rest of
the unlabeled target domain data. Our empirical studies
on two biomedical image databases and on a publicly
available 20 Newsgroups dataset show that incorporation
of uncertainty information and transfer learning
further improves the performance of the proposed active
learning based classifier. Our empirical studies also
show that the proposed transfer-active method based on
the joint optimization framework performs significantly
better than a framework which implements transfer and
active learning in two separate stages.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Briggs:2013:IAM,
author = "Forrest Briggs and Xiaoli Z. Fern and Raviv Raich and
Qi Lou",
title = "Instance Annotation for Multi-Instance Multi-Label
Learning",
journal = j-TKDD,
volume = "7",
number = "3",
pages = "14:1--14:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2500491",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:07 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multi-instance multi-label learning (MIML) is a
framework for supervised classification where the
objects to be classified are bags of instances
associated with multiple labels. For example, an image
can be represented as a bag of segments and associated
with a list of objects it contains. Prior work on MIML
has focused on predicting label sets for previously
unseen bags. We instead consider the problem of
predicting instance labels while learning from data
labeled only at the bag level. We propose a regularized
rank-loss objective designed for instance annotation,
which can be instantiated with different aggregation
models connecting instance-level labels with bag-level
label sets. The aggregation models that we consider can
be factored as a linear function of a ``support
instance'' for each class, which is a single feature
vector representing a whole bag. Hence we name our
proposed methods rank-loss Support Instance Machines
(SIM). We propose two optimization methods for the
rank-loss objective, which is nonconvex. One is a
heuristic method that alternates between updating
support instances, and solving a convex problem in
which the support instances are treated as constant.
The other is to apply the constrained concave-convex
procedure (CCCP), which can also be interpreted as
iteratively updating support instances and solving a
convex problem. To solve the convex problem, we employ
the Pegasos framework of primal subgradient descent,
and prove that it finds an $ \epsilon $-suboptimal
solution in runtime that is linear in the number of
bags, instances, and $ 1 / \epsilon $. Additionally, we
suggest a method of extending the linear learning
algorithm to nonlinear classification, without
increasing the runtime asymptotically. Experiments on
artificial and real-world datasets including images and
audio show that the proposed methods achieve higher
accuracy than other loss functions used in prior work,
e.g., Hamming loss, and recent work in ambiguous label
classification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ji:2013:PFR,
author = "Ming Ji and Binbin Lin and Xiaofei He and Deng Cai and
Jiawei Han",
title = "Parallel Field Ranking",
journal = j-TKDD,
volume = "7",
number = "3",
pages = "15:1--15:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2513092.2513096",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:07 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Recently, ranking data with respect to the intrinsic
geometric structure (manifold ranking) has received
considerable attentions, with encouraging performance
in many applications in pattern recognition,
information retrieval and recommendation systems. Most
of the existing manifold ranking methods focus on
learning a ranking function that varies smoothly along
the data manifold. However, beyond smoothness, a
desirable ranking function should vary monotonically
along the geodesics of the data manifold, such that the
ranking order along the geodesics is preserved. In this
article, we aim to learn a ranking function that varies
linearly and therefore monotonically along the
geodesics of the data manifold. Recent theoretical work
shows that the gradient field of a linear function on
the manifold has to be a parallel vector field.
Therefore, we propose a novel ranking algorithm on the
data manifolds, called Parallel Field Ranking.
Specifically, we try to learn a ranking function and a
vector field simultaneously. We require the vector
field to be close to the gradient field of the ranking
function, and the vector field to be as parallel as
possible. Moreover, we require the value of the ranking
function at the query point to be the highest, and then
decrease linearly along the manifold. Experimental
results on both synthetic data and real data
demonstrate the effectiveness of our proposed
algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Adali:2013:IPR,
author = "Sibel Adali and Malik Magdon-Ismail and Xiaohui Lu",
title = "{iHypR}: Prominence ranking in networks of
collaborations with hyperedges 1",
journal = j-TKDD,
volume = "7",
number = "4",
pages = "16:1--16:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2541268.2541269",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:09 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present a new algorithm called iHypR for computing
prominence of actors in social networks of
collaborations. Our algorithm builds on the assumption
that prominent actors collaborate on prominent objects,
and prominent objects are naturally grouped into
prominent clusters or groups (hyperedges in a graph).
iHypR makes use of the relationships between actors,
objects, and hyperedges to compute a global prominence
score for the actors in the network. We do not assume
the hyperedges are given in advance. Hyperedges
computed by our method can perform as well or even
better than ``true'' hyperedges. Our algorithm is
customized for networks of collaborations, but it is
generally applicable without further tuning. We show,
through extensive experimentation with three real-life
data sets and multiple external measures of prominence,
that our algorithm outperforms existing well-known
algorithms. Our work is the first to offer such an
extensive evaluation. We show that unlike most existing
algorithms, the performance is robust across multiple
measures of performance. Further, we give a detailed
study of the sensitivity of our algorithm to different
data sets and the design choices within the algorithm
that a user may wish to change. Our article illustrates
the various trade-offs that must be considered in
computing prominence in collaborative social
networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2013:STP,
author = "Jin Huang and Feiping Nie and Heng Huang and Yi-Cheng
Tu and Yu Lei",
title = "Social trust prediction using heterogeneous networks",
journal = j-TKDD,
volume = "7",
number = "4",
pages = "17:1--17:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2541268.2541270",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:09 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Along with increasing popularity of social websites,
online users rely more on the trustworthiness
information to make decisions, extract and filter
information, and tag and build connections with other
users. However, such social network data often suffer
from severe data sparsity and are not able to provide
users with enough information. Therefore, trust
prediction has emerged as an important topic in social
network research. Traditional approaches are primarily
based on exploring trust graph topology itself.
However, research in sociology and our life experience
suggest that people who are in the same social circle
often exhibit similar behaviors and tastes. To take
advantage of the ancillary information for trust
prediction, the challenge then becomes what to transfer
and how to transfer. In this article, we address this
problem by aggregating heterogeneous social networks
and propose a novel joint social networks mining (JSNM)
method. Our new joint learning model explores the
user-group-level similarity between correlated graphs
and simultaneously learns the individual graph
structure; therefore, the shared structures and
patterns from multiple social networks can be utilized
to enhance the prediction tasks. As a result, we not
only improve the trust prediction in the target graph
but also facilitate other information retrieval tasks
in the auxiliary graphs. To optimize the proposed
objective function, we use the alternative technique to
break down the objective function into several
manageable subproblems. We further introduce the
auxiliary function to solve the optimization problems
with rigorously proved convergence. The extensive
experiments have been conducted on both synthetic and
real- world data. All empirical results demonstrate the
effectiveness of our method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guzzo:2013:SIF,
author = "Antonella Guzzo and Luigi Moccia and Domenico
Sacc{\`a} and Edoardo Serra",
title = "Solving inverse frequent itemset mining with
infrequency constraints via large-scale linear
programs",
journal = j-TKDD,
volume = "7",
number = "4",
pages = "18:1--18:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2541268.2541271",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:09 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Inverse frequent set mining (IFM) is the problem of
computing a transaction database D satisfying given
support constraints for some itemsets, which are
typically the frequent ones. This article proposes a
new formulation of IFM, called IFM$_I$ (IFM with
infrequency constraints), where the itemsets that are
not listed as frequent are constrained to be
infrequent; that is, they must have a support less than
or equal to a specified unique threshold. An instance
of IFM$_I$ can be seen as an instance of the original
IFM by making explicit the infrequency constraints for
the minimal infrequent itemsets, corresponding to the
so-called negative generator border defined in the
literature. The complexity increase from PSPACE
(complexity of IFM) to NEXP (complexity of IFM$_I$) is
caused by the cardinality of the negative generator
border, which can be exponential in the original input
size. Therefore, the article introduces a specific
problem parameter $ \kappa $ that computes an upper
bound to this cardinality using a hypergraph
interpretation for which minimal infrequent itemsets
correspond to minimal transversals. By fixing a
constant k, the article formulates a $k$-bounded
definition of the problem, called $k$-IFM$_I$, that
collects all instances for which the value of the
parameter $ \kappa $ is less than or equal to $k$-its
complexity is in PSPACE as for IFM. The bounded problem
is encoded as an integer linear program with a large
number of variables (actually exponential w.r.t. the
number of constraints), which is thereafter
approximated by relaxing integer constraints-the
decision problem of solving the linear program is
proven to be in NP. In order to solve the linear
program, a column generation technique is used that is
a variation of the simplex method designed to solve
large-scale linear programs, in particular with a huge
number of variables. The method at each step requires
the solution of an auxiliary integer linear program,
which is proven to be NP hard in this case and for
which a greedy heuristic is presented. The resulting
overall column generation solution algorithm enjoys
very good scaling as evidenced by the intensive
experimentation, thereby paving the way for its
application in real-life scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Balcazar:2013:FCP,
author = "Jos{\'e} L. Balc{\'a}zar",
title = "Formal and computational properties of the confidence
boost of association rules",
journal = j-TKDD,
volume = "7",
number = "4",
pages = "19:1--19:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2541268.2541272",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:09 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Some existing notions of redundancy among association
rules allow for a logical-style characterization and
lead to irredundant bases of absolutely minimum size.
We push the intuition of redundancy further to find an
intuitive notion of novelty of an association rule,
with respect to other rules. Namely, an irredundant
rule is so because its confidence is higher than what
the rest of the rules would suggest; then, one can ask:
how much higher? We propose to measure such a sort of
novelty through the confidence boost of a rule. Acting
as a complement to confidence and support, the
confidence boost helps to obtain small and crisp sets
of mined association rules and solves the well-known
problem that, in certain cases, rules of negative
correlation may pass the confidence bound. We analyze
the properties of two versions of the notion of
confidence boost, one of them a natural generalization
of the other. We develop algorithms to filter rules
according to their confidence boost, compare the
concept to some similar notions in the literature, and
describe the results of some experimentation employing
the new notions on standard benchmark datasets. We
describe an open source association mining tool that
embodies one of our variants of confidence boost in
such a way that the data mining process does not
require the user to select any value for any
parameter.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ang:2013:CPN,
author = "Hock Hee Ang and Vivekanand Gopalkrishnan and Steven
C. H. Hoi and Wee Keong Ng",
title = "Classification in {P2P} networks with cascade support
vector machines",
journal = j-TKDD,
volume = "7",
number = "4",
pages = "20:1--20:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2541268.2541273",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:09 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Classification in Peer-to-Peer (P2P) networks is
important to many real applications, such as
distributed intrusion detection, distributed
recommendation systems, and distributed antispam
detection. However, it is very challenging to perform
classification in P2P networks due to many practical
issues, such as scalability, peer dynamism, and
asynchronism. This article investigates the practical
techniques of constructing Support Vector Machine (SVM)
classifiers in the P2P networks. In particular, we
demonstrate how to efficiently cascade SVM in a P2P
network with the use of reduced SVM. In addition, we
propose to fuse the concept of cascade SVM with
bootstrap aggregation to effectively balance the
trade-off between classification accuracy, model
construction, and prediction cost. We provide
theoretical insights for the proposed solutions and
conduct an extensive set of empirical studies on a
number of large-scale datasets. Encouraging results
validate the efficacy of the proposed approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2014:ISI,
author = "Wei Chen and Jie Tang",
title = "Introduction to special issue on computational aspects
of social and information networks: Theory,
methodologies, and applications {(TKDD-CASIN)}",
journal = j-TKDD,
volume = "8",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2556608",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:11 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2014:USN,
author = "Zhi Yang and Christo Wilson and Xiao Wang and Tingting
Gao and Ben Y. Zhao and Yafei Dai",
title = "Uncovering social network {Sybils} in the wild",
journal = j-TKDD,
volume = "8",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2556609",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:11 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Sybil accounts are fake identities created to unfairly
increase the power or resources of a single malicious
user. Researchers have long known about the existence
of Sybil accounts in online communities such as
file-sharing systems, but they have not been able to
perform large-scale measurements to detect them or
measure their activities. In this article, we describe
our efforts to detect, characterize, and understand
Sybil account activity in the Renren Online Social
Network (OSN). We use ground truth provided by Renren
Inc. to build measurement-based Sybil detectors and
deploy them on Renren to detect more than 100,000 Sybil
accounts. Using our full dataset of 650,000 Sybils, we
examine several aspects of Sybil behavior. First, we
study their link creation behavior and find that
contrary to prior conjecture, Sybils in OSNs do not
form tight-knit communities. Next, we examine the
fine-grained behaviors of Sybils on Renren using
clickstream data. Third, we investigate
behind-the-scenes collusion between large groups of
Sybils. Our results reveal that Sybils with no explicit
social ties still act in concert to launch attacks.
Finally, we investigate enhanced techniques to identify
stealthy Sybils. In summary, our study advances the
understanding of Sybil behavior on OSNs and shows that
Sybils can effectively avoid existing community-based
Sybil detectors. We hope that our results will foster
new research on Sybil detection that is based on novel
types of Sybil features.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jin:2014:SAR,
author = "Ruoming Jin and Victor E. Lee and Longjie Li",
title = "Scalable and axiomatic ranking of network role
similarity",
journal = j-TKDD,
volume = "8",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2518176",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:11 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "A key task in analyzing social networks and other
complex networks is role analysis: describing and
categorizing nodes according to how they interact with
other nodes. Two nodes have the same role if they
interact with equivalent sets of neighbors. The most
fundamental role equivalence is automorphic
equivalence. Unfortunately, the fastest algorithms
known for graph automorphism are nonpolynomial.
Moreover, since exact equivalence is rare, a more
meaningful task is measuring the role similarity
between any two nodes. This task is closely related to
the structural or link-based similarity problem that
SimRank addresses. However, SimRank and other existing
similarity measures are not sufficient because they do
not guarantee to recognize automorphically or
structurally equivalent nodes. This article makes two
contributions. First, we present and justify several
axiomatic properties necessary for a role similarity
measure or metric. Second, we present RoleSim, a new
similarity metric that satisfies these axioms and can
be computed with a simple iterative algorithm. We
rigorously prove that RoleSim satisfies all of these
axiomatic properties. We also introduce Iceberg
RoleSim, a scalable algorithm that discovers all pairs
with RoleSim scores above a user-defined threshold $
\theta $. We demonstrate the interpretative power of
RoleSim on both synthetic and real datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mcauley:2014:DSC,
author = "Julian Mcauley and Jure Leskovec",
title = "Discovering social circles in ego networks",
journal = j-TKDD,
volume = "8",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2556612",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:11 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "People's personal social networks are big and
cluttered, and currently there is no good way to
automatically organize them. Social networking sites
allow users to manually categorize their friends into
social circles (e.g., ``circles'' on Google+, and
``lists'' on Facebook and Twitter). However, circles
are laborious to construct and must be manually updated
whenever a user's network grows. In this article, we
study the novel task of automatically identifying
users' social circles. We pose this task as a
multimembership node clustering problem on a user's ego
network, a network of connections between her friends.
We develop a model for detecting circles that combines
network structure as well as user profile information.
For each circle, we learn its members and the
circle-specific user profile similarity metric.
Modeling node membership to multiple circles allows us
to detect overlapping as well as hierarchically nested
circles. Experiments show that our model accurately
identifies circles on a diverse set of data from
Facebook, Google+, and Twitter, for all of which we
obtain hand-labeled ground truth.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Abrahao:2014:SFA,
author = "Bruno Abrahao and Sucheta Soundarajan and John
Hopcroft and Robert Kleinberg",
title = "A separability framework for analyzing community
structure",
journal = j-TKDD,
volume = "8",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2527231",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:11 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Four major factors govern the intricacies of community
extraction in networks: (1) the literature offers a
multitude of disparate community detection algorithms
whose output exhibits high structural variability
across the collection, (2) communities identified by
algorithms may differ structurally from real
communities that arise in practice, (3) there is no
consensus characterizing how to discriminate
communities from noncommunities, and (4) the
application domain includes a wide variety of networks
of fundamentally different natures. In this article, we
present a class separability framework to tackle these
challenges through a comprehensive analysis of
community properties. Our approach enables the
assessment of the structural dissimilarity among the
output of multiple community detection algorithms and
between the output of algorithms and communities that
arise in practice. In addition, our method provides us
with a way to organize the vast collection of community
detection algorithms by grouping those that behave
similarly. Finally, we identify the most discriminative
graph-theoretical properties of community signature and
the small subset of properties that account for most of
the biases of the different community detection
algorithms. We illustrate our approach with an
experimental analysis, which reveals nuances of the
structure of real and extracted communities. In our
experiments, we furnish our framework with the output
of 10 different community detection procedures,
representative of categories of popular algorithms
available in the literature, applied to a diverse
collection of large-scale real network datasets whose
domains span biology, online shopping, and social
systems. We also analyze communities identified by
annotations that accompany the data, which reflect
exemplar communities in various domain. We characterize
these communities using a broad spectrum of community
properties to produce the different structural classes.
As our experiments show that community structure is not
a universal concept, our framework enables an informed
choice of the most suitable community detection method
for identifying communities of a specific type in a
given network and allows for a comparison of existing
community detection algorithms while guiding the design
of new ones.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhong:2014:UBL,
author = "Erheng Zhong and Wei Fan and Qiang Yang",
title = "User behavior learning and transfer in composite
social networks",
journal = j-TKDD,
volume = "8",
number = "1",
pages = "6:1--6:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2556613",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 13 09:16:11 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Accurate prediction of user behaviors is important for
many social media applications, including social
marketing, personalization, and recommendation. A major
challenge lies in that although many previous works
model user behavior from only historical behavior logs,
the available user behavior data or interactions
between users and items in a given social network are
usually very limited and sparse (e.g., $ \geq 99.9 \% $
empty), which makes models overfit the rare
observations and fail to provide accurate predictions.
We observe that many people are members of several
social networks in the same time, such as Facebook,
Twitter, and Tencent's QQ. Importantly, users'
behaviors and interests in different networks influence
one another. This provides an opportunity to leverage
the knowledge of user behaviors in different networks
by considering the overlapping users in different
networks as bridges, in order to alleviate the data
sparsity problem, and enhance the predictive
performance of user behavior modeling. Combining
different networks ``simply and naively'' does not work
well. In this article, we formulate the problem to
model multiple networks as ``adaptive composite
transfer'' and propose a framework called ComSoc.
ComSoc first selects the most suitable networks inside
a composite social network via a hierarchical Bayesian
model, parameterized for individual users. It then
builds topic models for user behavior prediction using
both the relationships in the selected networks and
related behavior data. With different relational
regularization, we introduce different implementations,
corresponding to different ways to transfer knowledge
from composite social relations. To handle big data, we
have implemented the algorithm using Map/Reduce. We
demonstrate that the proposed composite network-based
user behavior models significantly improve the
predictive accuracy over a number of existing
approaches on several real-world applications,
including a very large social networking dataset from
Tencent Inc.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ahmed:2014:NSS,
author = "Nesreen K. Ahmed and Jennifer Neville and Ramana
Kompella",
title = "Network Sampling: From Static to Streaming Graphs",
journal = j-TKDD,
volume = "8",
number = "2",
pages = "7:1--7:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2601438",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Jun 26 05:48:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Network sampling is integral to the analysis of
social, information, and biological networks. Since
many real-world networks are massive in size,
continuously evolving, and/or distributed in nature,
the network structure is often sampled in order to
facilitate study. For these reasons, a more thorough
and complete understanding of network sampling is
critical to support the field of network science. In
this paper, we outline a framework for the general
problem of network sampling by highlighting the
different objectives, population and units of interest,
and classes of network sampling methods. In addition,
we propose a spectrum of computational models for
network sampling methods, ranging from the
traditionally studied model based on the assumption of
a static domain to a more challenging model that is
appropriate for streaming domains. We design a family
of sampling methods based on the concept of graph
induction that generalize across the full spectrum of
computational models (from static to streaming) while
efficiently preserving many of the topological
properties of the input graphs. Furthermore, we
demonstrate how traditional static sampling algorithms
can be modified for graph streams for each of the three
main classes of sampling methods: node, edge, and
topology-based sampling. Experimental results indicate
that our proposed family of sampling methods more
accurately preserve the underlying properties of the
graph in both static and streaming domains. Finally, we
study the impact of network sampling algorithms on the
parameter estimation and performance evaluation of
relational classification algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ge:2014:RMA,
author = "Yong Ge and Guofei Jiang and Min Ding and Hui Xiong",
title = "Ranking Metric Anomaly in Invariant Networks",
journal = j-TKDD,
volume = "8",
number = "2",
pages = "8:1--8:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2601436",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Jun 26 05:48:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The management of large-scale distributed information
systems relies on the effective use and modeling of
monitoring data collected at various points in the
distributed information systems. A traditional approach
to model monitoring data is to discover invariant
relationships among the monitoring data. Indeed, we can
discover all invariant relationships among all pairs of
monitoring data and generate invariant networks, where
a node is a monitoring data source (metric) and a link
indicates an invariant relationship between two
monitoring data. Such an invariant network
representation can help system experts to localize and
diagnose the system faults by examining those broken
invariant relationships and their related metrics,
since system faults usually propagate among the
monitoring data and eventually lead to some broken
invariant relationships. However, at one time, there
are usually a lot of broken links (invariant
relationships) within an invariant network. Without
proper guidance, it is difficult for system experts to
manually inspect this large number of broken links. To
this end, in this article, we propose the problem of
ranking metrics according to the anomaly levels for a
given invariant network, while this is a nontrivial
task due to the uncertainties and the complex nature of
invariant networks. Specifically, we propose two types
of algorithms for ranking metric anomaly by link
analysis in invariant networks. Along this line, we
first define two measurements to quantify the anomaly
level of each metric, and introduce the m Rank
algorithm. Also, we provide a weighted score mechanism
and develop the g Rank algorithm, which involves an
iterative process to obtain a score to measure the
anomaly levels. In addition, some extended algorithms
based on m Rank and g Rank algorithms are developed by
taking into account the probability of being broken as
well as noisy links. Finally, we validate all the
proposed algorithms on a large number of real-world and
synthetic data sets to illustrate the effectiveness and
efficiency of different algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2014:DGP,
author = "Gensheng Zhang and Xiao Jiang and Ping Luo and Min
Wang and Chengkai Li",
title = "Discovering General Prominent Streaks in Sequence
Data",
journal = j-TKDD,
volume = "8",
number = "2",
pages = "9:1--9:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2601439",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Jun 26 05:48:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This article studies the problem of prominent streak
discovery in sequence data. Given a sequence of values,
a prominent streak is a long consecutive subsequence
consisting of only large (small) values, such as
consecutive games of outstanding performance in sports,
consecutive hours of heavy network traffic, and
consecutive days of frequent mentioning of a person in
social media. Prominent streak discovery provides
insightful data patterns for data analysis in many
real-world applications and is an enabling technique
for computational journalism. Given its real-world
usefulness and complexity, the research on prominent
streaks in sequence data opens a spectrum of
challenging problems. A baseline approach to finding
prominent streaks is a quadratic algorithm that
exhaustively enumerates all possible streaks and
performs pairwise streak dominance comparison. For more
efficient methods, we make the observation that
prominent streaks are in fact skyline points in two
dimensions-streak interval length and minimum value in
the interval. Our solution thus hinges on the idea to
separate the two steps in prominent streak discovery:
candidate streak generation and skyline operation over
candidate streaks. For candidate generation, we propose
the concept of local prominent streak (LPS). We prove
that prominent streaks are a subset of LPSs and the
number of LPSs is less than the length of a data
sequence, in comparison with the quadratic number of
candidates produced by the brute-force baseline method.
We develop efficient algorithms based on the concept of
LPS. The nonlinear local prominent streak (NLPS)-based
method considers a superset of LPSs as candidates, and
the linear local prominent streak (LLPS)-based method
further guarantees to consider only LPSs. The proposed
properties and algorithms are also extended for
discovering general top-$k$, multisequence, and
multidimensional prominent streaks. The results of
experiments using multiple real datasets verified the
effectiveness of the proposed methods and showed orders
of magnitude performance improvement against the
baseline method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Schifanella:2014:MTD,
author = "Claudio Schifanella and K. Sel{\c{c}}uk Candan and
Maria Luisa Sapino",
title = "Multiresolution Tensor Decompositions with Mode
Hierarchies",
journal = j-TKDD,
volume = "8",
number = "2",
pages = "10:1--10:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2532169",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Jun 26 05:48:22 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Tensors (multidimensional arrays) are widely used for
representing high-order dimensional data, in
applications ranging from social networks, sensor data,
and Internet traffic. Multiway data analysis
techniques, in particular tensor decompositions, allow
extraction of hidden correlations among multiway data
and thus are key components of many data analysis
frameworks. Intuitively, these algorithms can be
thought of as multiway clustering schemes, which
consider multiple facets of the data in identifying
clusters, their weights, and contributions of each data
element. Unfortunately, algorithms for fitting multiway
models are, in general, iterative and very time
consuming. In this article, we observe that, in many
applications, there is a priori background knowledge
(or metadata) about one or more domain dimensions. This
metadata is often in the form of a hierarchy that
clusters the elements of a given data facet (or mode).
We investigate whether such single-mode data
hierarchies can be used to boost the efficiency of
tensor decomposition process, without significant
impact on the final decomposition quality. We consider
each domain hierarchy as a guide to help provide
higher- or lower-resolution views of the data in the
tensor on demand and we rely on these metadata-induced
multiresolution tensor representations to develop a
multiresolution approach to tensor decomposition. In
this article, we focus on an alternating least squares
(ALS)--based implementation of the two most important
decomposition models such as the PARAllel FACtors
(PARAFAC, which decomposes a tensor into a diagonal
tensor and a set of factor matrices) and the Tucker
(which produces as result a core tensor and a set of
dimension-subspaces matrices). Experiment results show
that, when the available metadata is used as a rough
guide, the proposed multiresolution method helps fit
both PARAFAC and Tucker models with consistent (under
different parameters settings) savings in execution
time and memory consumption, while preserving the
quality of the decomposition.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2014:RMN,
author = "Jin Huang and Feiping Nie and Heng Huang and Chris
Ding",
title = "Robust Manifold Nonnegative Matrix Factorization",
journal = j-TKDD,
volume = "8",
number = "3",
pages = "11:1--11:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2601434",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 3 13:50:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Nonnegative Matrix Factorization (NMF) has been one of
the most widely used clustering techniques for
exploratory data analysis. However, since each data
point enters the objective function with squared
residue error, a few outliers with large errors easily
dominate the objective function. In this article, we
propose a Robust Manifold Nonnegative Matrix
Factorization (RMNMF) method using l$_{2, 1}$ -norm and
integrating NMF and spectral clustering under the same
clustering framework. We also point out the solution
uniqueness issue for the existing NMF methods and
propose an additional orthonormal constraint to address
this problem. With the new constraint, the conventional
auxiliary function approach no longer works. We tackle
this difficult optimization problem via a novel
Augmented Lagrangian Method (ALM)--based algorithm and
convert the original constrained optimization problem
on one variable into a multivariate constrained
problem. The new objective function then can be
decomposed into several subproblems that each has a
closed-form solution. More importantly, we reveal the
connection of our method with robust K -means and
spectral clustering, and we demonstrate its theoretical
significance. Extensive experiments have been conducted
on nine benchmark datasets, and all empirical results
show the effectiveness of our method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2014:RAL,
author = "Yu Zhang and Dit-Yan Yeung",
title = "A Regularization Approach to Learning Task
Relationships in Multitask Learning",
journal = j-TKDD,
volume = "8",
number = "3",
pages = "12:1--12:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2538028",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 3 13:50:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multitask learning is a learning paradigm that seeks
to improve the generalization performance of a learning
task with the help of some other related tasks. In this
article, we propose a regularization approach to
learning the relationships between tasks in multitask
learning. This approach can be viewed as a novel
generalization of the regularized formulation for
single-task learning. Besides modeling positive task
correlation, our approach-multitask relationship
learning (MTRL)-can also describe negative task
correlation and identify outlier tasks based on the
same underlying principle. By utilizing a
matrix-variate normal distribution as a prior on the
model parameters of all tasks, our MTRL method has a
jointly convex objective function. For efficiency, we
use an alternating method to learn the optimal model
parameters for each task as well as the relationships
between tasks. We study MTRL in the symmetric multitask
learning setting and then generalize it to the
asymmetric setting as well. We also discuss some
variants of the regularization approach to demonstrate
the use of other matrix-variate priors for learning
task relationships. Moreover, to gain more insight into
our model, we also study the relationships between MTRL
and some existing multitask learning methods.
Experiments conducted on a toy problem as well as
several benchmark datasets demonstrate the
effectiveness of MTRL as well as its high
interpretability revealed by the task covariance
matrix.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2014:SCR,
author = "Ming Lin and Shifeng Weng and Changshui Zhang",
title = "On the Sample Complexity of Random {Fourier} Features
for Online Learning: How Many Random {Fourier} Features
Do We Need?",
journal = j-TKDD,
volume = "8",
number = "3",
pages = "13:1--13:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2611378",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 3 13:50:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We study the sample complexity of random Fourier
features for online kernel learning-that is, the number
of random Fourier features required to achieve good
generalization performance. We show that when the loss
function is strongly convex and smooth, online kernel
learning with random Fourier features can achieve an $
O (l o g T / T) $ bound for the excess risk with only $
O (1 / \lambda^2) $ random Fourier features, where T is
the number of training examples and \lambda is the
modulus of strong convexity. This is a significant
improvement compared to the existing result for batch
kernel learning that requires $ O(T) $ random Fourier
features to achieve a generalization bound $ O(1 /
\sqrt T) $. Our empirical study verifies that online
kernel learning with a limited number of random Fourier
features can achieve similar generalization performance
as online learning using full kernel matrix. We also
present an enhanced online learning algorithm with
random Fourier features that improves the
classification performance by multiple passes of
training examples and a partial average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Eyal:2014:PIM,
author = "Ron Eyal and Avi Rosenfeld and Sigal Sina and Sarit
Kraus",
title = "Predicting and Identifying Missing Node Information in
Social Networks",
journal = j-TKDD,
volume = "8",
number = "3",
pages = "14:1--14:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536775",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Jun 26 05:48:23 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In recent years, social networks have surged in
popularity. One key aspect of social network research
is identifying important missing information that is
not explicitly represented in the network, or is not
visible to all. To date, this line of research
typically focused on finding the connections that are
missing between nodes, a challenge typically termed as
the link prediction problem. This article introduces
the missing node identification problem, where missing
members in the social network structure must be
identified. In this problem, indications of missing
nodes are assumed to exist. Given these indications and
a partial network, we must assess which indications
originate from the same missing node and determine the
full network structure. Toward solving this problem, we
present the missing node identification by spectral
clustering algorithm (MISC), an approach based on a
spectral clustering algorithm, combined with nodes'
pairwise affinity measures that were adopted from link
prediction research. We evaluate the performance of our
approach in different problem settings and scenarios,
using real-life data from Facebook. The results show
that our approach has beneficial results and can be
effective in solving the missing node identification
problem. In addition, this article also presents
R-MISC, which uses a sparse matrix representation,
efficient algorithms for calculating the nodes'
pairwise affinity, and a proprietary dimension
reduction technique to enable scaling the MISC
algorithm to large networks of more than 100,000 nodes.
Last, we consider problem settings where some of the
indications are unknown. Two algorithms are suggested
for this problem: speculative MISC, based on MISC, and
missing link completion, based on classical link
prediction literature. We show that speculative MISC
outperforms missing link completion.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Webb:2014:EDM,
author = "Geoffrey I. Webb and Jilles Vreeken",
title = "Efficient Discovery of the Most Interesting
Associations",
journal = j-TKDD,
volume = "8",
number = "3",
pages = "15:1--15:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2601433",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Jun 26 05:48:23 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Self-sufficient itemsets have been proposed as an
effective approach to summarizing the key associations
in data. However, their computation appears highly
demanding, as assessing whether an itemset is
self-sufficient requires consideration of all pairwise
partitions of the itemset into pairs of subsets as well
as consideration of all supersets. This article
presents the first published algorithm for efficiently
discovering self-sufficient itemsets. This
branch-and-bound algorithm deploys two powerful pruning
mechanisms based on upper bounds on itemset value and
statistical significance level. It demonstrates that
finding top-$k$ productive and nonredundant itemsets,
with postprocessing to identify those that are not
independently productive, can efficiently identify
small sets of key associations. We present extensive
evaluation of the strengths and limitations of the
technique, including comparisons with alternative
approaches to finding the most interesting
associations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shabtai:2014:ODM,
author = "Asaf Shabtai and Maya Bercovitch and Lior Rokach and
Yuval Elovici",
title = "Optimizing Data Misuse Detection",
journal = j-TKDD,
volume = "8",
number = "3",
pages = "16:1--16:??",
month = jun,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2611520",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 3 13:50:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Data misuse may be performed by entities such as an
organization's employees and business partners who are
granted access to sensitive information and misuse
their privileges. We assume that users can be either
trusted or untrusted. The access of untrusted parties
to data objects (e.g., client and patient records)
should be monitored in an attempt to detect misuse.
However, monitoring data objects is resource intensive
and time-consuming and may also cause disturbance or
inconvenience to the involved employees. Therefore, the
monitored data objects should be carefully selected. In
this article, we present two optimization problems
carefully designed for selecting specific data objects
for monitoring, such that the detection rate is
maximized and the monitoring effort is minimized. In
the first optimization problem, the goal is to select
data objects for monitoring that are accessed by at
most c trusted agents while ensuring access to at least
k monitored objects by each untrusted agent (both c and
k are integer variable). As opposed to the first
optimization problem, the goal of the second
optimization problem is to select monitored data
objects that maximize the number of monitored data
objects accessed by untrusted agents while ensuring
that each trusted agent does not access more than d
monitored data objects (d is an integer variable as
well). Two efficient heuristic algorithms for solving
these optimization problems are proposed, and
experiments were conducted simulating different
scenarios to evaluate the algorithms' performance.
Moreover, we compared the heuristic algorithms'
performance to the optimal solution and conducted
sensitivity analysis on the three parameters (c, k, and
d) and on the ratio between the trusted and untrusted
agents.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hernandez-Orallo:2014:PRC,
author = "Jos{\'e} Hern{\'a}ndez-Orallo",
title = "Probabilistic Reframing for Cost-Sensitive
Regression",
journal = j-TKDD,
volume = "8",
number = "4",
pages = "17:1--17:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2641758",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:02 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Common-day applications of predictive models usually
involve the full use of the available contextual
information. When the operating context changes, one
may fine-tune the by-default (incontextual) prediction
or may even abstain from predicting a value (a reject).
Global reframing solutions, where the same function is
applied to adapt the estimated outputs to a new cost
context, are possible solutions here. An alternative
approach, which has not been studied in a comprehensive
way for regression in the knowledge discovery and data
mining literature, is the use of a local (e.g.,
probabilistic) reframing approach, where decisions are
made according to the estimated output and a
reliability, confidence, or probability estimation. In
this article, we advocate for a simple two-parameter
(mean and variance) approach, working with a normal
conditional probability density. Given the conditional
mean produced by any regression technique, we develop
lightweight ``enrichment'' methods that produce good
estimates of the conditional variance, which are used
by the probabilistic (local) reframing methods. We
apply these methods to some very common families of
cost-sensitive problems, such as optimal predictions in
(auction) bids, asymmetric loss scenarios, and
rejection rules.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Miettinen:2014:MMD,
author = "Pauli Miettinen and Jilles Vreeken",
title = "{MDL4BMF}: Minimum Description Length for {Boolean}
Matrix Factorization",
journal = j-TKDD,
volume = "8",
number = "4",
pages = "18:1--18:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2601437",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:45:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Matrix factorizations-where a given data matrix is
approximated by a product of two or more factor
matrices-are powerful data mining tools. Among other
tasks, matrix factorizations are often used to separate
global structure from noise. This, however, requires
solving the ``model order selection problem'' of
determining the proper rank of the factorization, that
is, to answer where fine-grained structure stops, and
where noise starts. Boolean Matrix Factorization
(BMF)-where data, factors, and matrix product are
Boolean-has in recent years received increased
attention from the data mining community. The technique
has desirable properties, such as high interpretability
and natural sparsity. Yet, so far no method for
selecting the correct model order for BMF has been
available. In this article, we propose the use of the
Minimum Description Length (MDL) principle for this
task. Besides solving the problem, this well-founded
approach has numerous benefits; for example, it is
automatic, does not require a likelihood function, is
fast, and, as experiments show, is highly accurate. We
formulate the description length function for BMF in
general-making it applicable for any BMF algorithm. We
discuss how to construct an appropriate encoding:
starting from a simple and intuitive approach, we
arrive at a highly efficient data-to-model--based
encoding for BMF. We extend an existing algorithm for
BMF to use MDL to identify the best Boolean matrix
factorization, analyze the complexity of the problem,
and perform an extensive experimental evaluation to
study its behavior.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tang:2014:FSS,
author = "Jiliang Tang and Huan Liu",
title = "Feature Selection for Social Media Data",
journal = j-TKDD,
volume = "8",
number = "4",
pages = "19:1--19:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629587",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:45:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Feature selection is widely used in preparing
high-dimensional data for effective data mining. The
explosive popularity of social media produces massive
and high-dimensional data at an unprecedented rate,
presenting new challenges to feature selection. Social
media data consists of (1) traditional
high-dimensional, attribute-value data such as posts,
tweets, comments, and images, and (2) linked data that
provides social context for posts and describes the
relationships between social media users as well as who
generates the posts, and so on. The nature of social
media also determines that its data is massive, noisy,
and incomplete, which exacerbates the already
challenging problem of feature selection. In this
article, we study a novel feature selection problem of
selecting features for social media data with its
social context. In detail, we illustrate the
differences between attribute-value data and social
media data, investigate if linked data can be exploited
in a new feature selection framework by taking
advantage of social science theories. We design and
conduct experiments on datasets from real-world social
media Web sites, and the empirical results demonstrate
that the proposed framework can significantly improve
the performance of feature selection. Further
experiments are conducted to evaluate the effects of
user--user and user--post relationships manifested in
linked data on feature selection, and research issues
for future work will be discussed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Riondato:2014:EDA,
author = "Matteo Riondato and Eli Upfal",
title = "Efficient Discovery of Association Rules and Frequent
Itemsets through Sampling with Tight Performance
Guarantees",
journal = j-TKDD,
volume = "8",
number = "4",
pages = "20:1--20:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629586",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:45:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The tasks of extracting (top-$K$) Frequent Itemsets
(FIs) and Association Rules (ARs) are fundamental
primitives in data mining and database applications.
Exact algorithms for these problems exist and are
widely used, but their running time is hindered by the
need of scanning the entire dataset, possibly multiple
times. High-quality approximations of FIs and ARs are
sufficient for most practical uses. Sampling techniques
can be used for fast discovery of approximate
solutions, but works exploring this technique did not
provide satisfactory performance guarantees on the
quality of the approximation due to the difficulty of
bounding the probability of under- or oversampling any
one of an unknown number of frequent itemsets. We
circumvent this issue by applying the statistical
concept of Vapnik--Chervonenkis (VC) dimension to
develop a novel technique for providing tight bounds on
the sample size that guarantees approximation of the
(top-$K$) FIs and ARs within user-specified parameters.
The resulting sample size is linearly dependent on the
VC-dimension of a range space associated with the
dataset. We analyze the VC-dimension of this range
space and show that it is upper bounded by an
easy-to-compute characteristic quantity of the dataset,
the d-index, namely, the maximum integer d such that
the dataset contains at least d transactions of length
at least d such that no one of them is a superset of or
equal to another. We show that this bound is tight for
a large class of datasets. The resulting sample size is
a significant improvement over previous known results.
We present an extensive experimental evaluation of our
technique on real and artificial datasets,
demonstrating the practicality of our methods, and
showing that they achieve even higher quality
approximations than what is guaranteed by the
analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Burton:2014:DSC,
author = "Scott H. Burton and Christophe G. Giraud-Carrier",
title = "Discovering Social Circles in Directed Graphs",
journal = j-TKDD,
volume = "8",
number = "4",
pages = "21:1--21:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2641759",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:02 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We examine the problem of identifying social circles,
or sets of cohesive and mutually aware nodes
surrounding an initial query set, in directed graphs
where the complete graph is not known beforehand. This
problem differs from local community mining, in that
the query set defines the circle of interest. We
explicitly handle edge direction, as in many cases
relationships are not symmetric, and focus on the local
context because many real-world graphs cannot be
feasibly known. We outline several issues that are
unique to this context, introduce a quality function to
measure the value of including a particular node in an
emerging social circle, and describe a greedy social
circle discovery algorithm. We demonstrate the
effectiveness of this approach on artificial
benchmarks, large networks with topical community
labels, and several real-world case studies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Paul:2014:RPL,
author = "Saurabh Paul and Christos Boutsidis and Malik
Magdon-Ismail and Petros Drineas",
title = "Random Projections for Linear Support Vector
Machines",
journal = j-TKDD,
volume = "8",
number = "4",
pages = "22:1--22:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2641760",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:45:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Let $X$ be a data matrix of rank $ \rho $, whose rows
represent $n$ points in $d$-dimensional space. The
linear support vector machine constructs a hyperplane
separator that maximizes the 1-norm soft margin. We
develop a new oblivious dimension reduction technique
that is precomputed and can be applied to any input
matrix $X$. We prove that, with high probability, the
margin and minimum enclosing ball in the feature space
are preserved to within $ \epsilon $-relative error,
ensuring comparable generalization as in the original
space in the case of classification. For regression, we
show that the margin is preserved to $ \epsilon
$-relative error with high probability. We present
extensive experiments with real and synthetic data to
support our theory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Erdo:2014:RGN,
author = "D{\'o}ra Erd{\H{o}}s and Rainer Gemulla and Evimaria
Terzi",
title = "Reconstructing Graphs from Neighborhood Data",
journal = j-TKDD,
volume = "8",
number = "4",
pages = "23:1--23:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2641761",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:02 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Consider a social network and suppose that we are only
given the number of common friends between each pair of
users. Can we reconstruct the underlying network?
Similarly, consider a set of documents and the words
that appear in them. If we only know the number of
common words for every pair of documents, as well as
the number of common documents for every pair of words,
can we infer which words appear in which documents? In
this article, we develop a general methodology for
answering questions like these. We formalize these
questions in what we call the {\em R}econstruct
problem: given information about the common neighbors
of nodes in a network, our goal is to reconstruct the
hidden binary matrix that indicates the presence or
absence of relationships between individual nodes. In
fact, we propose two different variants of this
problem: one where the number of connections of every
node (i.e., the degree of every node) is known and a
second one where it is unknown. We call these variants
the degree-aware and the degree-oblivious versions of
the Reconstruct problem, respectively. Our algorithms
for both variants exploit the properties of the
singular value decomposition of the hidden binary
matrix. More specifically, we show that using the
available neighborhood information, we can reconstruct
the hidden matrix by finding the components of its
singular value decomposition and then combining them
appropriately. Our extensive experimental study
suggests that our methods are able to reconstruct
binary matrices of different characteristics with up to
100\% accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Acharya:2014:OFC,
author = "Ayan Acharya and Eduardo R. Hruschka and Joydeep Ghosh
and Sreangsu Acharyya",
title = "An Optimization Framework for Combining Ensembles of
Classifiers and Clusterers with Applications to
Nontransductive Semisupervised Learning and Transfer
Learning",
journal = j-TKDD,
volume = "9",
number = "1",
pages = "1:1--1:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2601435",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:05 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Unsupervised models can provide supplementary soft
constraints to help classify new ``target'' data
because similar instances in the target set are more
likely to share the same class label. Such models can
also help detect possible differences between training
and target distributions, which is useful in
applications where concept drift may take place, as in
transfer learning settings. This article describes a
general optimization framework that takes as input
class membership estimates from existing classifiers
learned on previously encountered ``source'' (or
training) data, as well as a similarity matrix from a
cluster ensemble operating solely on the target (or
test) data to be classified, and yields a consensus
labeling of the target data. More precisely, the
application settings considered are nontransductive
semisupervised and transfer learning scenarios where
the training data are used only to build an ensemble of
classifiers and are subsequently discarded before
classifying the target data. The framework admits a
wide range of loss functions and
classification/clustering methods. It exploits
properties of Bregman divergences in conjunction with
Legendre duality to yield a principled and scalable
approach. A variety of experiments show that the
proposed framework can yield results substantially
superior to those provided by na{\"\i}vely applying
classifiers learned on the original task to the target
data. In addition, we show that the proposed approach,
even not being conceptually transductive, can provide
better results compared to some popular transductive
learning techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Boedihardjo:2014:FEL,
author = "Arnold P. Boedihardjo and Chang-Tien Lu and Bingsheng
Wang",
title = "A Framework for Exploiting Local Information to
Enhance Density Estimation of Data Streams",
journal = j-TKDD,
volume = "9",
number = "1",
pages = "2:1--2:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629618",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:05 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The Probability Density Function (PDF) is the
fundamental data model for a variety of stream mining
algorithms. Existing works apply the standard
nonparametric Kernel Density Estimator (KDE) to
approximate the PDF of data streams. As a result, the
stream-based KDEs cannot accurately capture complex
local density features. In this article, we propose the
use of Local Region (LRs) to model local density
information in univariate data streams. In-depth
theoretical analyses are presented to justify the
effectiveness of the LR-based KDE. Based on the
analyses, we develop the General Local rEgion AlgorithM
(GLEAM) to enhance the estimation quality of
structurally complex univariate distributions for
existing stream-based KDEs. A set of algorithmic
optimizations is designed to improve the query
throughput of GLEAM and to achieve its linear order
computation. Additionally, a comprehensive suite of
experiments was conducted to test the effectiveness and
efficiency of GLEAM.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ordonez:2014:BVS,
author = "Carlos Ordonez and Carlos Garcia-Alvarado and
Veerabhadaran Baladandayuthapani",
title = "{Bayesian} Variable Selection in Linear Regression in
One Pass for Large Datasets",
journal = j-TKDD,
volume = "9",
number = "1",
pages = "3:1--3:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629617",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:05 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Bayesian models are generally computed with Markov
Chain Monte Carlo (MCMC) methods. The main disadvantage
of MCMC methods is the large number of iterations they
need to sample the posterior distributions of model
parameters, especially for large datasets. On the other
hand, variable selection remains a challenging problem
due to its combinatorial search space, where Bayesian
models are a promising solution. In this work, we study
how to accelerate Bayesian model computation for
variable selection in linear regression. We propose a
fast Gibbs sampler algorithm, a widely used MCMC method
that incorporates several optimizations. We use a
Zellner prior for the regression coefficients, an
improper prior on variance, and a conjugate prior
Gaussian distribution, which enable dataset
summarization in one pass, thus exploiting an augmented
set of sufficient statistics. Thereafter, the algorithm
iterates in main memory. Sufficient statistics are
indexed with a sparse binary vector to efficiently
compute matrix projections based on selected variables.
Discovered variable subsets probabilities, selecting
and discarding each variable, are stored on a hash
table for fast retrieval in future iterations. We study
how to integrate our algorithm into a Database
Management System (DBMS), exploiting aggregate
User-Defined Functions for parallel data summarization
and stored procedures to manipulate matrices with
arrays. An experimental evaluation with real datasets
evaluates accuracy and time performance, comparing our
DBMS-based algorithm with the R package. Our algorithm
is shown to produce accurate results, scale linearly on
dataset size, and run orders of magnitude faster than
the R package.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Fei:2014:SSB,
author = "Hongliang Fei and Jun Huan",
title = "Structured Sparse Boosting for Graph Classification",
journal = j-TKDD,
volume = "9",
number = "1",
pages = "4:1--4:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629328",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:05 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Boosting is a highly effective algorithm that produces
a linear combination of weak classifiers (a.k.a. base
learners) to obtain high-quality classification models.
In this article, we propose a generalized logit boost
algorithm in which base learners have structural
relationships in the functional space. Although such
relationships are generic, our work is particularly
motivated by the emerging topic of pattern-based
classification for semistructured data including
graphs. Toward an efficient incorporation of the
structure information, we have designed a general model
in which we use an undirected graph to capture the
relationship of subgraph-based base learners. In our
method, we employ both L$_1$ and Laplacian-based L$_2$
regularization to logit boosting to achieve model
sparsity and smoothness in the functional space spanned
by the base learners. We have derived efficient
optimization algorithms based on coordinate descent for
the new boosting formulation and theoretically prove
that it exhibits a natural grouping effect for nearby
spatial or overlapping base learners and that the
resulting estimator is consistent. Additionally,
motivated by the connection between logit boosting and
logistic regression, we extend our structured sparse
regularization framework to logistic regression for
vectorial data in which features are structured. Using
comprehensive experimental study and comparing our work
with the state-of-the-art, we have demonstrated the
effectiveness of the proposed learning method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2014:GGB,
author = "Zhiqiang Xu and Yiping Ke and Yi Wang and Hong Cheng
and James Cheng",
title = "{GBAGC}: a General {Bayesian} Framework for Attributed
Graph Clustering",
journal = j-TKDD,
volume = "9",
number = "1",
pages = "5:1--5:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629616",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:05 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Graph clustering, also known as community detection,
is a long-standing problem in data mining. In recent
years, with the proliferation of rich attribute
information available for objects in real-world graphs,
how to leverage not only structural but also attribute
information for clustering attributed graphs becomes a
new challenge. Most existing works took a
distance-based approach. They proposed various distance
measures to fuse structural and attribute information
and then applied standard techniques for graph
clustering based on these distance measures. In this
article, we take an alternative view and propose a
novel Bayesian framework for attributed graph
clustering. Our framework provides a general and
principled solution to modeling both the structural and
the attribute aspects of a graph. It avoids the
artificial design of a distance measure in existing
methods and, furthermore, can seamlessly handle graphs
with different types of edges and vertex attributes. We
develop an efficient variational method for graph
clustering under this framework and derive two concrete
algorithms for clustering unweighted and weighted
attributed graphs. Experimental results on large
real-world datasets show that our algorithms
significantly outperform the state-of-the-art
distance-based method, in terms of both effectiveness
and efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Coscia:2014:UHO,
author = "Michele Coscia and Giulio Rossetti and Fosca Giannotti
and Dino Pedreschi",
title = "Uncovering Hierarchical and Overlapping Communities
with a Local-First Approach",
journal = j-TKDD,
volume = "9",
number = "1",
pages = "6:1--6:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629511",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Aug 26 17:49:05 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Community discovery in complex networks is the task of
organizing a network's structure by grouping together
nodes related to each other. Traditional approaches are
based on the assumption that there is a global-level
organization in the network. However, in many
scenarios, each node is the bearer of complex
information and cannot be classified in disjoint
clusters. The top-down global view of the partition
approach is not designed for this. Here, we represent
this complex information as multiple latent labels, and
we postulate that edges in the networks are created
among nodes carrying similar labels. The latent labels
are the communities a node belongs to and we discover
them with a simple local-first approach to community
discovery. This is achieved by democratically letting
each node vote for the communities it sees surrounding
it in its limited view of the global system, its ego
neighborhood, using a label propagation algorithm,
assuming that each node is aware of the label it shares
with each of its connections. The local communities are
merged hierarchically, unveiling the modular
organization of the network at the global level and
identifying overlapping groups and groups of groups. We
tested this intuition against the state-of-the-art
overlapping community discovery and found that our new
method advances in the chosen scenarios in the quality
of the obtained communities. We perform a test on
benchmark and on real-world networks, evaluating the
quality of the community coverage by using the
extracted communities to predict the metadata attached
to the nodes, which we consider external information
about the latent labels. We also provide an explanation
about why real-world networks contain overlapping
communities and how our logic is able to capture them.
Finally, we show how our method is deterministic, is
incremental, and has a limited time complexity, so that
it can be used on real-world scale networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2014:GML,
author = "Guangtao Wang and Qinbao Song and Xueying Zhang and
Kaiyuan Zhang",
title = "A Generic Multilabel Learning-Based Classification
Algorithm Recommendation Method",
journal = j-TKDD,
volume = "9",
number = "1",
pages = "7:1--7:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629474",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Oct 10 17:19:10 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "As more and more classification algorithms continue to
be developed, recommending appropriate algorithms to a
given classification problem is increasingly important.
This article first distinguishes the algorithm
recommendation methods by two dimensions: (1)
meta-features, which are a set of measures used to
characterize the learning problems, and (2)
meta-target, which represents the relative performance
of the classification algorithms on the learning
problem. In contrast to the existing algorithm
recommendation methods whose meta-target is usually in
the form of either the ranking of candidate algorithms
or a single algorithm, this article proposes a new and
natural multilabel form to describe the meta-target.
This is due to the fact that there would be multiple
algorithms being appropriate for a given problem in
practice. Furthermore, a novel multilabel
learning-based generic algorithm recommendation method
is proposed, which views the algorithm recommendation
as a multilabel learning problem and solves the problem
by the mature multilabel learning algorithms. To
evaluate the proposed multilabel learning-based
recommendation method, extensive experiments with 13
well-known classification algorithms, two kinds of
meta-targets such as algorithm ranking and single
algorithm, and five different kinds of meta-features
are conducted on 1,090 benchmark learning problems. The
results show the effectiveness of our proposed
multilabel learning-based recommendation method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2014:EEM,
author = "Pinghui Wang and John C. S. Lui and Bruno Ribeiro and
Don Towsley and Junzhou Zhao and Xiaohong Guan",
title = "Efficiently Estimating Motif Statistics of Large
Networks",
journal = j-TKDD,
volume = "9",
number = "2",
pages = "8:1--8:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629564",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:49:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Exploring statistics of locally connected subgraph
patterns (also known as network motifs) has helped
researchers better understand the structure and
function of biological and Online Social Networks
(OSNs). Nowadays, the massive size of some critical
networks-often stored in already overloaded relational
databases-effectively limits the rate at which nodes
and edges can be explored, making it a challenge to
accurately discover subgraph statistics. In this work,
we propose sampling methods to accurately estimate
subgraph statistics from as few queried nodes as
possible. We present sampling algorithms that
efficiently and accurately estimate subgraph properties
of massive networks. Our algorithms require no
precomputation or complete network topology
information. At the same time, we provide theoretical
guarantees of convergence. We perform experiments using
widely known datasets and show that, for the same
accuracy, our algorithms require an order of magnitude
less queries (samples) than the current
state-of-the-art algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zheng:2014:FHE,
author = "Li Zheng and Tao Li and Chris Ding",
title = "A Framework for Hierarchical Ensemble Clustering",
journal = j-TKDD,
volume = "9",
number = "2",
pages = "9:1--9:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2611380",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:49:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Ensemble clustering, as an important extension of the
clustering problem, refers to the problem of combining
different (input) clusterings of a given dataset to
generate a final (consensus) clustering that is a
better fit in some sense than existing clusterings.
Over the past few years, many ensemble clustering
approaches have been developed. However, most of them
are designed for partitional clustering methods, and
few research efforts have been reported for ensemble
hierarchical clustering methods. In this article, a
hierarchical ensemble clustering framework that can
naturally combine both partitional clustering and
hierarchical clustering results is proposed. In
addition, a novel method for learning the ultra-metric
distance from the aggregated distance matrices and
generating final hierarchical clustering with enhanced
cluster separation is developed based on the
ultra-metric distance for hierarchical clustering. We
study three important problems: dendrogram description,
dendrogram combination, and dendrogram selection. We
develop two approaches for dendrogram selection based
on tree distances, and we investigate various
dendrogram distances for representing dendrograms. We
provide a systematic empirical study of the ensemble
hierarchical clustering problem. Experimental results
demonstrate the effectiveness of our proposed
approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huai:2014:TPC,
author = "Baoxing Huai and Enhong Chen and Hengshu Zhu and Hui
Xiong and Tengfei Bao and Qi Liu and Jilei Tian",
title = "Toward Personalized Context Recognition for Mobile
Users: a Semisupervised {Bayesian} {HMM} Approach",
journal = j-TKDD,
volume = "9",
number = "2",
pages = "10:1--10:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629504",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:49:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The problem of mobile context recognition targets the
identification of semantic meaning of context in a
mobile environment. This plays an important role in
understanding mobile user behaviors and thus provides
the opportunity for the development of better
intelligent context-aware services. A key step of
context recognition is to model the personalized
contextual information of mobile users. Although many
studies have been devoted to mobile context modeling,
limited efforts have been made on the exploitation of
the sequential and dependency characteristics of mobile
contextual information. Also, the latent semantics
behind mobile context are often ambiguous and poorly
understood. Indeed, a promising direction is to
incorporate some domain knowledge of common contexts,
such as ``waiting for a bus'' or ``having dinner,'' by
modeling both labeled and unlabeled context data from
mobile users because there are often few labeled
contexts available in practice. To this end, in this
article, we propose a sequence-based semisupervised
approach to modeling personalized context for mobile
users. Specifically, we first exploit the Bayesian
Hidden Markov Model (B-HMM) for modeling context in the
form of probabilistic distributions and transitions of
raw context data. Also, we propose a sequential model
by extending B-HMM with the prior knowledge of
contextual features to model context more accurately.
Then, to efficiently learn the parameters and initial
values of the proposed models, we develop a novel
approach for parameter estimation by integrating the
Dirichlet Process Mixture (DPM) model and the Mixture
Unigram (MU) model. Furthermore, by incorporating both
user-labeled and unlabeled data, we propose a
semisupervised learning-based algorithm to identify and
model the latent semantics of context. Finally,
experimental results on real-world data clearly
validate both the efficiency and effectiveness of the
proposed approaches for recognizing personalized
context of mobile users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2014:ADI,
author = "Siyuan Liu and Lei Chen and Lionel M. Ni",
title = "Anomaly Detection from Incomplete Data",
journal = j-TKDD,
volume = "9",
number = "2",
pages = "11:1--11:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629668",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:49:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Anomaly detection (a.k.a., outlier or burst detection)
is a well-motivated problem and a major data mining and
knowledge discovery task. In this article, we study the
problem of population anomaly detection, one of the key
issues related to event monitoring and population
management within a city. Through studying detected
population anomalies, we can trace and analyze these
anomalies, which could help to model city traffic
design and event impact analysis and prediction.
Although a significant and interesting issue, it is
very hard to detect population anomalies and retrieve
anomaly trajectories, especially given that it is
difficult to get actual and sufficient population data.
To address the difficulties of a lack of real
population data, we take advantage of mobile phone
networks, which offer enormous spatial and temporal
communication data on persons. More importantly, we
claim that we can utilize these mobile phone data to
infer and approximate population data. Thus, we can
study the population anomaly detection problem by
taking advantages of unique features hidden in mobile
phone data. In this article, we present a system to
conduct Population Anomaly Detection (PAD). First, we
propose an effective clustering method,
correlation-based clustering, to cluster the incomplete
location information from mobile phone data (i.e., from
mobile call volume distribution to population density
distribution). Then, we design an adaptive
parameter-free detection method, R-scan, to capture the
distributed dynamic anomalies. Finally, we devise an
efficient algorithm, BT-miner, to retrieve anomaly
trajectories. The experimental results from real-life
mobile phone data confirm the effectiveness and
efficiency of the proposed algorithms. Finally, the
proposed methods are realized as a pilot system in a
city in China.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gundecha:2014:UVR,
author = "Pritam Gundecha and Geoffrey Barbier and Jiliang Tang
and Huan Liu",
title = "User Vulnerability and Its Reduction on a Social
Networking Site",
journal = j-TKDD,
volume = "9",
number = "2",
pages = "12:1--12:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2630421",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:49:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Privacy and security are major concerns for many users
of social media. When users share information (e.g.,
data and photos) with friends, they can make their
friends vulnerable to security and privacy breaches
with dire consequences. With the continuous expansion
of a user's social network, privacy settings alone are
often inadequate to protect a user's profile. In this
research, we aim to address some critical issues
related to privacy protection: (1) How can we measure
and assess individual users' vulnerability? (2) With
the diversity of one's social network friends, how can
one figure out an effective approach to maintaining
balance between vulnerability and social utility? In
this work, first we present a novel way to define
vulnerable friends from an individual user's
perspective. User vulnerability is dependent on whether
or not the user's friends' privacy settings protect the
friend and the individual's network of friends (which
includes the user). We show that it is feasible to
measure and assess user vulnerability and reduce one's
vulnerability without changing the structure of a
social networking site. The approach is to unfriend
one's most vulnerable friends. However, when such a
vulnerable friend is also socially important,
unfriending him or her would significantly reduce one's
own social status. We formulate this novel problem as
vulnerability minimization with social utility
constraints. We formally define the optimization
problem and provide an approximation algorithm with a
proven bound. Finally, we conduct a large-scale
evaluation of a new framework using a Facebook dataset.
We resort to experiments and observe how much
vulnerability an individual user can be decreased by
unfriending a vulnerable friend. We compare performance
of different unfriending strategies and discuss the
security risk of new friend requests. Additionally, by
employing different forms of social utility, we confirm
that the balance between user vulnerability and social
utility can be practically achieved.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Duan:2014:SRC,
author = "Lian Duan and W. Nick Street and Yanchi Liu and
Songhua Xu and Brook Wu",
title = "Selecting the Right Correlation Measure for Binary
Data",
journal = j-TKDD,
volume = "9",
number = "2",
pages = "13:1--13:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2637484",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:49:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Finding the most interesting correlations among items
is essential for problems in many commercial, medical,
and scientific domains. Although there are numerous
measures available for evaluating correlations,
different correlation measures provide drastically
different results. Piatetsky-Shapiro provided three
mandatory properties for any reasonable correlation
measure, and Tan et al. proposed several properties to
categorize correlation measures; however, it is still
hard for users to choose the desirable correlation
measures according to their needs. In order to solve
this problem, we explore the effectiveness problem in
three ways. First, we propose two desirable properties
and two optional properties for correlation measure
selection and study the property satisfaction for
different correlation measures. Second, we study
different techniques to adjust correlation measures and
propose two new correlation measures: the Simplified $
\chi^2 $ with Continuity Correction and the Simplified
$ \chi^2 $ with Support. Third, we analyze the upper
and lower bounds of different measures and categorize
them by the bound differences. Combining these three
directions, we provide guidelines for users to choose
the proper measure according to their needs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2014:PBA,
author = "Hao Huang and Hong Qin and Shinjae Yoo and Dantong
Yu",
title = "Physics-Based Anomaly Detection Defined on Manifold
Space",
journal = j-TKDD,
volume = "9",
number = "2",
pages = "14:1--14:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2641574",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Oct 7 18:49:26 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Current popular anomaly detection algorithms are
capable of detecting global anomalies but often fail to
distinguish local anomalies from normal instances.
Inspired by contemporary physics theory (i.e., heat
diffusion and quantum mechanics), we propose two
unsupervised anomaly detection algorithms. Building on
the embedding manifold derived from heat diffusion, we
devise Local Anomaly Descriptor (LAD), which faithfully
reveals the intrinsic neighborhood density. It uses a
scale-dependent umbrella operator to bridge global and
local properties, which makes LAD more informative
within an adaptive scope of neighborhood. To offer more
stability of local density measurement on scaling
parameter tuning, we formulate Fermi Density Descriptor
(FDD), which measures the probability of a fermion
particle being at a specific location. By choosing the
stable energy distribution function, FDD steadily
distinguishes anomalies from normal instances with any
scaling parameter setting. To further enhance the
efficacy of our proposed algorithms, we explore the
utility of anisotropic Gaussian kernel (AGK), which
offers better manifold-aware affinity information. We
also quantify and examine the effect of different
Laplacian normalizations for anomaly detection.
Comprehensive experiments on both synthetic and
benchmark datasets verify that our proposed algorithms
outperform the existing anomaly detection algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gionis:2015:ISI,
author = "Aristides Gionis and Hang Li",
title = "Introduction to the Special Issue {ACM SIGKDD} 2013",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "15:1--15:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700993",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15e",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jha:2015:SES,
author = "Madhav Jha and C. Seshadhri and Ali Pinar",
title = "A Space-Efficient Streaming Algorithm for Estimating
Transitivity and Triangle Counts Using the Birthday
Paradox",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "15:1--15:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700395",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 6 09:34:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We design a space-efficient algorithm that
approximates the transitivity (global clustering
coefficient) and total triangle count with only a
single pass through a graph given as a stream of edges.
Our procedure is based on the classic probabilistic
result, the birthday paradox. When the transitivity is
constant and there are more edges than wedges (common
properties for social networks), we can prove that our
algorithm requires $ O(\sqrt n) $ space ($n$ is the
number of vertices) to provide accurate estimates. We
run a detailed set of experiments on a variety of real
graphs and demonstrate that the memory requirement of
the algorithm is a tiny fraction of the graph. For
example, even for a graph with 200 million edges, our
algorithm stores just 40,000 edges to give accurate
results. Being a single pass streaming algorithm, our
procedure also maintains a real-time estimate of the
transitivity/number of triangles of a graph by storing
a minuscule fraction of edges.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tang:2015:FMT,
author = "Lu-An Tang and Xiao Yu and Quanquan Gu and Jiawei Han
and Guofei Jiang and Alice Leung and Thomas {La
Porta}",
title = "A Framework of Mining Trajectories from Untrustworthy
Data in Cyber-Physical System",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "16:1--16:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700394",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 6 09:34:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "A cyber-physical system (CPS) integrates physical
(i.e., sensor) devices with cyber (i.e., informational)
components to form a context-sensitive system that
responds intelligently to dynamic changes in real-world
situations. The CPS has wide applications in scenarios
such as environment monitoring, battlefield
surveillance, and traffic control. One key research
problem of CPS is called mining lines in the sand. With
a large number of sensors (sand) deployed in a
designated area, the CPS is required to discover all
trajectories (lines) of passing intruders in real time.
There are two crucial challenges that need to be
addressed: (1) the collected sensor data are not
trustworthy, and (2) the intruders do not send out any
identification information. The system needs to
distinguish multiple intruders and track their
movements. This study proposes a method called LiSM
(Line-in-the-Sand Miner) to discover trajectories from
untrustworthy sensor data. LiSM constructs a watching
network from sensor data and computes the locations of
intruder appearances based on the link information of
the network. The system retrieves a cone model from the
historical trajectories to track multiple intruders.
Finally, the system validates the mining results and
updates sensors' reliability scores in a feedback
process. In addition, LoRM (Line-on-the-Road Miner) is
proposed for trajectory discovery on road networks-
mining lines on the roads. LoRM employs a
filtering-and-refinement framework to reduce the
distance computational overhead on road networks and
uses a shortest-path-measure to track intruders. The
proposed methods are evaluated with extensive
experiments on big datasets. The experimental results
show that the proposed methods achieve higher accuracy
and efficiency in trajectory mining tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2015:QDR,
author = "Zheng Wang and Jieping Ye",
title = "Querying Discriminative and Representative Samples for
Batch Mode Active Learning",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "17:1--17:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700408",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 6 09:34:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Empirical risk minimization (ERM) provides a
principled guideline for many machine learning and data
mining algorithms. Under the ERM principle, one
minimizes an upper bound of the true risk, which is
approximated by the summation of empirical risk and the
complexity of the candidate classifier class. To
guarantee a satisfactory learning performance, ERM
requires that the training data are i.i.d. sampled from
the unknown source distribution. However, this may not
be the case in active learning, where one selects the
most informative samples to label, and these data may
not follow the source distribution. In this article, we
generalize the ERM principle to the active learning
setting. We derive a novel form of upper bound for the
true risk in the active learning setting; by minimizing
this upper bound, we develop a practical batch mode
active learning method. The proposed formulation
involves a nonconvex integer programming optimization
problem. We solve it efficiently by an alternating
optimization method. Our method is shown to query the
most informative samples while preserving the source
distribution as much as possible, thus identifying the
most uncertain and representative queries. We further
extend our method to multiclass active learning by
introducing novel pseudolabels in the multiclass case
and developing an efficient algorithm. Experiments on
benchmark datasets and real-world applications
demonstrate the superior performance of our proposed
method compared to state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gopal:2015:HBI,
author = "Siddharth Gopal and Yiming Yang",
title = "Hierarchical {Bayesian} Inference and Recursive
Regularization for Large-Scale Classification",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "18:1--18:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629585",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this article, we address open challenges in
large-scale classification, focusing on how to
effectively leverage the dependency structures
(hierarchical or graphical) among class labels, and how
to make the inference scalable in jointly optimizing
all model parameters. We propose two main approaches,
namely the hierarchical Bayesian inference framework
and the recursive regularization scheme. The key idea
in both approaches is to reinforce the similarity among
parameter across the nodes in a hierarchy or network
based on the proximity and connectivity of the nodes.
For scalability, we develop hierarchical variational
inference algorithms and fast dual coordinate descent
training procedures with parallelization. In our
experiments for classification problems with hundreds
of thousands of classes and millions of training
instances with terabytes of parameters, the proposed
methods show consistent and statistically significant
improvements over other competing approaches, and the
best results on multiple benchmark datasets for
large-scale classification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yin:2015:MLB,
author = "Hongzhi Yin and Bin Cui and Ling Chen and Zhiting Hu
and Chengqi Zhang",
title = "Modeling Location-Based User Rating Profiles for
Personalized Recommendation",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "19:1--19:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2663356",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This article proposes LA-LDA, a location-aware
probabilistic generative model that exploits
location-based ratings to model user profiles and
produce recommendations. Most of the existing
recommendation models do not consider the spatial
information of users or items; however, LA-LDA supports
three classes of location-based ratings, namely spatial
user ratings for nonspatial items, nonspatial user
ratings for spatial items, and spatial user ratings for
spatial items. LA-LDA consists of two components,
ULA-LDA and ILA-LDA, which are designed to take into
account user and item location information,
respectively. The component ULA-LDA explicitly
incorporates and quantifies the influence from local
public preferences to produce recommendations by
considering user home locations, whereas the component
ILA-LDA recommends items that are closer in both taste
and travel distance to the querying users by capturing
item co-occurrence patterns, as well as item location
co-occurrence patterns. The two components of LA-LDA
can be applied either separately or collectively,
depending on the available types of location-based
ratings. To demonstrate the applicability and
flexibility of the LA-LDA model, we deploy it to both
top-$k$ recommendation and cold start recommendation
scenarios. Experimental evidence on large-scale
real-world data, including the data from Gowalla (a
location-based social network), DoubanEvent (an
event-based social network), and MovieLens (a movie
recommendation system), reveal that LA-LDA models user
profiles more accurately by outperforming existing
recommendation models for top-$k$ recommendation and
the cold start problem.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hu:2015:PSD,
author = "Juhua Hu and De-Chuan Zhan and Xintao Wu and Yuan
Jiang and Zhi-Hua Zhou",
title = "Pairwised Specific Distance Learning from Physical
Linkages",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "20:1--20:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700405",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In real tasks, usually a good classification
performance can only be obtained when a good distance
metric is obtained; therefore, distance metric learning
has attracted significant attention in the past few
years. Typical studies of distance metric learning
evaluate how to construct an appropriate distance
metric that is able to separate training data points
from different classes or satisfy a set of constraints
(e.g., must-links and/or cannot-links). It is
noteworthy that this task becomes challenging when
there are only limited labeled training data points and
no constraints are given explicitly. Moreover, most
existing approaches aim to construct a global distance
metric that is applicable to all data points. However,
different data points may have different properties and
may require different distance metrics. We notice that
data points in real tasks are often connected by
physical links (e.g., people are linked with each other
in social networks; personal webpages are often
connected to other webpages, including nonpersonal
webpages), but the linkage information has not been
exploited in distance metric learning. In this article,
we develop a pairwised specific distance (PSD) approach
that exploits the structures of physical linkages and
in particular captures the key observations that
nonmetric and clique linkages imply the appearance of
different or unique semantics, respectively. It is
noteworthy that, rather than generating a global
distance, PSD generates different distances for
different pairs of data points; this property is
desired in applications involving complicated data
semantics. We mainly present PSD for multi-class
learning and further extend it to multi-label learning.
Experimental results validate the effectiveness of PSD,
especially in the scenarios in which there are very
limited labeled training data points and no explicit
constraints are given.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Soundarajan:2015:ULG,
author = "Sucheta Soundarajan and John E. Hopcroft",
title = "Use of Local Group Information to Identify Communities
in Networks",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "21:1--21:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700404",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The recent interest in networks has inspired a broad
range of work on algorithms and techniques to
characterize, identify, and extract communities from
networks. Such efforts are complicated by a lack of
consensus on what a ``community'' truly is, and these
disagreements have led to a wide variety of
mathematical formulations for describing communities.
Often, these mathematical formulations, such as
modularity and conductance, have been founded in the
general principle that communities, like a $ G(n, p) $
graph, are ``round,'' with connections throughout the
entire community, and so algorithms were developed to
optimize such mathematical measures. More recently, a
variety of algorithms have been developed that, rather
than expecting connectivity through the entire
community, seek out very small groups of well-connected
nodes and then connect these groups into larger
communities. In this article, we examine seven real
networks, each containing external annotation that
allows us to identify ``annotated communities.'' A
study of these annotated communities gives insight into
why the second category of community detection
algorithms may be more successful than the first
category. We then present a flexible algorithm template
that is based on the idea of joining together small
sets of nodes. In this template, we first identify very
small, tightly connected ``subcommunities'' of nodes,
each corresponding to a single node's ``perception'' of
the network around it. We then create a new network in
which each node represents such a subcommunity, and
then identify communities in this new network. Because
each node can appear in multiple subcommunities, this
method allows us to detect overlapping communities.
When evaluated on real data, we show that our template
outperforms many other state-of-the-art algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2015:UCN,
author = "Pinghui Wang and Junzhou Zhao and John C. S. Lui and
Don Towsley and Xiaohong Guan",
title = "Unbiased Characterization of Node Pairs over Large
Graphs",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "22:1--22:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700393",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Characterizing user pair relationships is important
for applications such as friend recommendation and
interest targeting in online social networks (OSNs).
Due to the large-scale nature of such networks, it is
infeasible to enumerate all user pairs and thus
sampling is used. In this article, we show that it is a
great challenge for OSN service providers to
characterize user pair relationships, even when they
possess the complete graph topology. The reason is that
when sampling techniques (i.e., uniform vertex sampling
(UVS) and random walk (RW)) are naively applied, they
can introduce large biases, particularly for estimating
similarity distribution of user pairs with constraints
like existence of mutual neighbors, which is important
for applications such as identifying network homophily.
Estimating statistics of user pairs is more challenging
in the absence of the complete topology information, as
an unbiased sampling technique like UVS is usually not
allowed and exploring the OSN graph topology is
expensive. To address these challenges, we present
unbiased sampling methods to characterize user pair
properties based on UVS and RW techniques. We carry out
an evaluation of our methods to show their accuracy and
efficiency. Finally, we apply our methods to three
OSNs-Foursquare, Douban, and Xiami-and discover that
significant homophily is present in these networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Vlachos:2015:DPC,
author = "Michail Vlachos and Johannes Schneider and Vassilios
G. Vassiliadis",
title = "On Data Publishing with Clustering Preservation",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "23:1--23:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700403",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The emergence of cloud-based storage services is
opening up new avenues in data exchange and data
dissemination. This has amplified the interest in
right-protection mechanisms to establish ownership in
the event of data leakage. Current right-protection
technologies, however, rarely provide strong guarantees
on dataset utility after the protection process. This
work presents techniques that explicitly address this
topic and provably preserve the outcome of certain
mining operations. In particular, we take special care
to guarantee that the outcome of hierarchical
clustering operations remains the same before and after
right protection. Our approach considers all prevalent
hierarchical clustering variants: single-, complete-,
and average-linkage. We imprint the ownership in a
dataset using watermarking principles, and we derive
tight bounds on the expansion/contraction of distances
incurred by the process. We leverage our analysis to
design fast algorithms for right protection without
exhaustively searching the vast design space. Finally,
because the right-protection process introduces a
user-tunable distortion on the dataset, we explore the
possibility of using this mechanism for data
obfuscation. We quantify the tradeoff between
obfuscation and utility for spatiotemporal datasets and
discover very favorable characteristics of the process.
An additional advantage is that when one is interested
in both right-protecting and obfuscating the original
data values, the proposed mechanism can accomplish both
tasks simultaneously.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{VazDeMelo:2015:UDP,
author = "Pedro O. S. {Vaz De Melo} and Christos Faloutsos and
Renato Assun{\c{c}}{\~a}o and Rodrigo Alves and Antonio
A. F. Loureiro",
title = "Universal and Distinct Properties of Communication
Dynamics: How to Generate Realistic Inter-event Times",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "24:1--24:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700399",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "With the advancement of information systems, means of
communications are becoming cheaper, faster, and more
available. Today, millions of people carrying
smartphones or tablets are able to communicate
practically any time and anywhere they want. They can
access their e-mails, comment on weblogs, watch and
post videos and photos (as well as comment on them),
and make phone calls or text messages almost
ubiquitously. Given this scenario, in this article, we
tackle a fundamental aspect of this new era of
communication: How the time intervals between
communication events behave for different technologies
and means of communications. Are there universal
patterns for the Inter-Event Time Distribution (IED)?
How do inter-event times behave differently among
particular technologies? To answer these questions, we
analyzed eight different datasets from real and modern
communication data and found four well-defined patterns
seen in all the eight datasets. Moreover, we propose
the use of the Self-Feeding Process (SFP) to generate
inter-event times between communications. The SFP is an
extremely parsimonious point process that requires at
most two parameters and is able to generate inter-event
times with all the universal properties we observed in
the data. We also show three potential applications of
the SFP: as a framework to generate a synthetic dataset
containing realistic communication events of any one of
the analyzed means of communications, as a technique to
detect anomalies, and as a building block for more
specific models that aim to encompass the
particularities seen in each of the analyzed systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2015:WIY,
author = "Jing Zhang and Jie Tang and Juanzi Li and Yang Liu and
Chunxiao Xing",
title = "Who Influenced You? {Predicting} Retweet via Social
Influence Locality",
journal = j-TKDD,
volume = "9",
number = "3",
pages = "25:1--25:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700398",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 14 09:22:28 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Social influence occurs when one's opinions, emotions,
or behaviors are affected by others in a social
network. However, social influence takes many forms,
and its underlying mechanism is still unclear. For
example, how is one's behavior influenced by a group of
friends who know each other and by the friends from
different ego friend circles? In this article, we study
the social influence problem in a large microblogging
network. Particularly, we consider users' (re)tweet
behaviors and focus on investigating how friends in
one's ego network influence retweet behaviors. We
propose a novel notion of social influence locality and
develop two instantiation functions based on pairwise
influence and structural diversity. The defined
influence locality functions have strong predictive
power. Without any additional features, we can obtain
an F1-score of 71.65\% for predicting users' retweet
behaviors by training a logistic regression classifier
based on the defined influence locality functions. We
incorporate social influence locality into a factor
graph model, which can further leverage the
network-based correlation. Our experiments on the large
microblogging network show that the model significantly
improves the precision of retweet prediction. Our
analysis also reveals several intriguing discoveries.
For example, if you have six friends retweeting a
microblog, the average likelihood that you will also
retweet it strongly depends on the structure among the
six friends: The likelihood will significantly drop
(only 1/6) when the six friends do not know each other,
compared with the case when the six friends know each
other.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xie:2015:MMA,
author = "Hong Xie and John C. S. Lui",
title = "Mathematical Modeling and Analysis of Product Rating
with Partial Information",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "26:1--26:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700386",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Many Web services like Amazon, Epinions, and
TripAdvisor provide historical product ratings so that
users can evaluate the quality of products. Product
ratings are important because they affect how well a
product will be adopted by the market. The challenge is
that we only have partial information on these ratings:
each user assigns ratings to only a small subset of
products. Under this partial information setting, we
explore a number of fundamental questions. What is the
minimum number of ratings a product needs so that one
can make a reliable evaluation of its quality? How may
users' misbehavior, such as cheating in product rating,
affect the evaluation result? To answer these
questions, we present a probabilistic model to capture
various important factors (e.g., rating aggregation
rules, rating behavior) that may influence the product
quality assessment under the partial information
setting. We derive the minimum number of ratings needed
to produce a reliable indicator on the quality of a
product. We extend our model to accommodate users'
misbehavior in product rating. We derive the maximum
fraction of misbehaving users that a rating aggregation
rule can tolerate and the minimum number of ratings
needed to compensate. We carry out experiments using
both synthetic and real-world data (from Amazon and
TripAdvisor). We not only validate our model but also
show that the ``average rating rule'' produces more
reliable and robust product quality assessments than
the ``majority rating rule'' and the ``median rating
rule'' in aggregating product ratings. Last, we perform
experiments on two movie rating datasets (from Flixster
and Netflix) to demonstrate how to apply our framework
to improve the applications of recommender systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Esuli:2015:OTQ,
author = "Andrea Esuli and Fabrizio Sebastiani",
title = "Optimizing Text Quantifiers for Multivariate Loss
Functions",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "27:1--27:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700406",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We address the problem of quantification, a supervised
learning task whose goal is, given a class, to estimate
the relative frequency (or prevalence) of the class in
a dataset of unlabeled items. Quantification has
several applications in data and text mining, such as
estimating the prevalence of positive reviews in a set
of reviews of a given product or estimating the
prevalence of a given support issue in a dataset of
transcripts of phone calls to tech support. So far,
quantification has been addressed by learning a
general-purpose classifier, counting the unlabeled
items that have been assigned the class, and tuning the
obtained counts according to some heuristics. In this
article, we depart from the tradition of using
general-purpose classifiers and use instead a
supervised learning model for structured prediction,
capable of generating classifiers directly optimized
for the (multivariate and nonlinear) function used for
evaluating quantification accuracy. The experiments
that we have run on 5,500 binary high-dimensional
datasets (averaging more than 14,000 documents each)
show that this method is more accurate, more stable,
and more efficient than existing state-of-the-art
quantification methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2015:IMS,
author = "Bing-Rong Lin and Daniel Kifer",
title = "Information Measures in Statistical Privacy and Data
Processing Applications",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "28:1--28:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700407",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In statistical privacy, utility refers to two
concepts: information preservation, how much
statistical information is retained by a sanitizing
algorithm, and usability, how (and with how much
difficulty) one extracts this information to build
statistical models, answer queries, and so forth. Some
scenarios incentivize a separation between information
preservation and usability, so that the data owner
first chooses a sanitizing algorithm to maximize a
measure of information preservation, and, afterward,
the data consumers process the sanitized output
according to their various individual needs [Ghosh et
al. 2009; Williams and McSherry 2010]. We analyze the
information-preserving properties of utility measures
with a combination of two new and three existing
utility axioms and study how violations of an axiom can
be fixed. We show that the average (over possible
outputs of the sanitizer) error of Bayesian decision
makers forms the unique class of utility measures that
satisfy all of the axioms. The axioms are agnostic to
Bayesian concepts such as subjective probabilities and
hence strengthen support for Bayesian views in privacy
research. In particular, this result connects
information preservation to aspects of usability-if the
information preservation of a sanitizing algorithm
should be measured as the average error of a Bayesian
decision maker, shouldn't Bayesian decision theory be a
good choice when it comes to using the sanitized
outputs for various purposes? We put this idea to the
test in the unattributed histogram problem where our
decision-theoretic postprocessing algorithm empirically
outperforms previously proposed approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2015:DAC,
author = "Hao Huang and Shinjae Yoo and Dantong Yu and Hong
Qin",
title = "Density-Aware Clustering Based on Aggregated Heat
Kernel and Its Transformation",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "29:1--29:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700385",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Current spectral clustering algorithms suffer from the
sensitivity to existing noise and parameter scaling and
may not be aware of different density distributions
across clusters. If these problems are left untreated,
the consequent clustering results cannot accurately
represent true data patterns, in particular, for
complex real-world datasets with heterogeneous
densities. This article aims to solve these problems by
proposing a diffusion-based Aggregated Heat Kernel
(AHK) to improve the clustering stability, and a Local
Density Affinity Transformation (LDAT) to correct the
bias originating from different cluster densities. AHK
statistically models the heat diffusion traces along
the entire time scale, so it ensures robustness during
the clustering process, while LDAT probabilistically
reveals the local density of each instance and
suppresses the local density bias in the affinity
matrix. Our proposed framework integrates these two
techniques systematically. As a result, it not only
provides an advanced noise-resisting and density-aware
spectral mapping to the original dataset but also
demonstrates the stability during the processing of
tuning the scaling parameter (which usually controls
the range of neighborhood). Furthermore, our framework
works well with the majority of similarity kernels,
which ensures its applicability to many types of data
and problem domains. The systematic experiments on
different applications show that our proposed algorithm
outperforms state-of-the-art clustering algorithms for
the data with heterogeneous density distributions and
achieves robust clustering performance with respect to
tuning the scaling parameter and handling various
levels and types of noise.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2015:CSF,
author = "Kui Yu and Wei Ding and Dan A. Simovici and Hao Wang
and Jian Pei and Xindong Wu",
title = "Classification with Streaming Features: an
Emerging-Pattern Mining Approach",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "30:1--30:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700409",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Many datasets from real-world applications have very
high-dimensional or increasing feature space. It is a
new research problem to learn and maintain a classifier
to deal with very high dimensionality or streaming
features. In this article, we adapt the well-known
emerging-pattern--based classification models and
propose a semi-streaming approach. For streaming
features, it is computationally expensive or even
prohibitive to mine long-emerging patterns, and it is
nontrivial to integrate emerging-pattern mining with
feature selection. We present an online feature
selection step, which is capable of selecting and
maintaining a pool of effective features from a feature
stream. Then, in our offline step, separated from the
online step, we periodically compute and update
emerging patterns from the pool of selected features
from the online step. We evaluate the effectiveness and
efficiency of the proposed method using a series of
benchmark datasets and a real-world case study on Mars
crater detection. Our proposed method yields
classification performance comparable to the
state-of-art static classification methods. Most
important, the proposed method is significantly faster
and can efficiently handle datasets with streaming
features.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2015:SEH,
author = "Guimei Liu and Haojun Zhang and Mengling Feng and
Limsoon Wong and See-Kiong Ng",
title = "Supporting Exploratory Hypothesis Testing and
Analysis",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "31:1--31:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2701430",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Conventional hypothesis testing is carried out in a
hypothesis-driven manner. A scientist must first
formulate a hypothesis based on what he or she sees and
then devise a variety of experiments to test it. Given
the rapid growth of data, it has become virtually
impossible for a person to manually inspect all data to
find all of the interesting hypotheses for testing. In
this article, we propose and develop a data-driven
framework for automatic hypothesis testing and
analysis. We define a hypothesis as a comparison
between two or more subpopulations. We find
subpopulations for comparison using frequent pattern
mining techniques and then pair them up for statistical
hypothesis testing. We also generate additional
information for further analysis of the hypotheses that
are deemed significant. The number of hypotheses
generated can be very large, and many of them are very
similar. We develop algorithms to remove redundant
hypotheses and present a succinct set of significant
hypotheses to users. We conducted a set of experiments
to show the efficiency and effectiveness of the
proposed algorithms. The results show that our system
can help users (1) identify significant hypotheses
efficiently, (2) isolate the reasons behind significant
hypotheses efficiently, and (3) find confounding
factors that form Simpson's paradoxes with discovered
significant hypotheses.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Greco:2015:PDU,
author = "Gianluigi Greco and Antonella Guzzo and Francesco
Lupia and Luigi Pontieri",
title = "Process Discovery under Precedence Constraints",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "32:1--32:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2710020",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Process discovery has emerged as a powerful approach
to support the analysis and the design of complex
processes. It consists of analyzing a set of traces
registering the sequence of tasks performed along
several enactments of a transactional system, in order
to build a process model that can explain all the
episodes recorded over them. An approach to accomplish
this task is presented that can benefit from the
background knowledge that, in many cases, is available
to the analysts taking care of the process (re-)design.
The approach is based on encoding the information
gathered from the log and the (possibly) given
background knowledge in terms of precedence
constraints, that is, of constraints over the topology
of the resulting process models. Mining algorithms are
eventually formulated in terms of reasoning problems
over precedence constraints, and the computational
complexity of such problems is thoroughly analyzed by
tracing their tractability frontier. Solution
algorithms are proposed and their properties analyzed.
These algorithms have been implemented in a prototype
system, and results of a thorough experimental activity
are discussed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mirbakhsh:2015:ITR,
author = "Nima Mirbakhsh and Charles X. Ling",
title = "Improving Top-{$N$} Recommendation for Cold-Start
Users via Cross-Domain Information",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "33:1--33:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2724720",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Making accurate recommendations for cold-start users
is a challenging yet important problem in
recommendation systems. Including more information from
other domains is a natural solution to improve the
recommendations. However, most previous work in
cross-domain recommendations has focused on improving
prediction accuracy with several severe limitations. In
this article, we extend our previous work on
clustering-based matrix factorization in single domains
into cross domains. In addition, we utilize recent
results on unobserved ratings. Our new method can more
effectively utilize data from auxiliary domains to
achieve better recommendations, especially for
cold-start users. For example, our method improves the
recall to 21\% on average for cold-start users, whereas
previous methods result in only 15\% recall in the
cross-domain Amazon dataset. We also observe almost the
same improvements in the Epinions dataset. Considering
that it is often difficult to make even a small
improvement in recommendations, for cold-start users in
particular, our result is quite significant.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bonchi:2015:CCC,
author = "Francesco Bonchi and Aristides Gionis and Francesco
Gullo and Charalampos E. Tsourakakis and Antti
Ukkonen",
title = "Chromatic Correlation Clustering",
journal = j-TKDD,
volume = "9",
number = "4",
pages = "34:1--34:??",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2728170",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jun 3 06:21:22 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We study a novel clustering problem in which the
pairwise relations between objects are categorical.
This problem can be viewed as clustering the vertices
of a graph whose edges are of different types (colors).
We introduce an objective function that ensures the
edges within each cluster have, as much as possible,
the same color. We show that the problem is NP-hard and
propose a randomized algorithm with approximation
guarantee proportional to the maximum degree of the
input graph. The algorithm iteratively picks a random
edge as a pivot, builds a cluster around it, and
removes the cluster from the graph. Although being
fast, easy to implement, and parameter-free, this
algorithm tends to produce a relatively large number of
clusters. To overcome this issue we introduce a variant
algorithm, which modifies how the pivot is chosen and
how the cluster is built around the pivot. Finally, to
address the case where a fixed number of output
clusters is required, we devise a third algorithm that
directly optimizes the objective function based on the
alternating-minimization paradigm. We also extend our
objective function to handle cases where object's
relations are described by multiple labels. We modify
our randomized approximation algorithm to optimize such
an extended objective function and show that its
approximation guarantee remains proportional to the
maximum degree of the graph. We test our algorithms on
synthetic and real data from the domains of social
media, protein-interaction networks, and bibliometrics.
Results reveal that our algorithms outperform a
baseline algorithm both in the task of reconstructing a
ground-truth clustering and in terms of
objective-function value.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2015:LSC,
author = "Hua Wang and Feiping Nie and Heng Huang",
title = "Large-Scale Cross-Language {Web} Page Classification
via Dual Knowledge Transfer Using Fast Nonnegative
Matrix Trifactorization",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "1:1--1:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2710021",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "With the rapid growth of modern technologies, Internet
has reached almost every corner of the world. As a
result, it becomes more and more important to manage
and mine information contained in Web pages in
different languages. Traditional supervised learning
methods usually require a large amount of training data
to obtain accurate and robust classification models.
However, labeled Web pages did not increase as fast as
the growth of Internet. The lack of sufficient training
Web pages in many languages, especially for those in
uncommonly used languages, makes it a challenge for
traditional classification algorithms to achieve
satisfactory performance. To address this, we observe
that Web pages for a same topic from different
languages usually share some common semantic patterns,
though in different representation forms. In addition,
we also observe that the associations between word
clusters and Web page classes are another type of
reliable carriers to transfer knowledge across
languages. With these recognitions, in this article we
propose a novel joint nonnegative matrix
trifactorization (NMTF) based Dual Knowledge Transfer
(DKT) approach for cross-language Web page
classification. Our approach transfers knowledge from
the auxiliary language, in which abundant labeled Web
pages are available, to the target languages, in which
we want to classify Web pages, through two different
paths: word cluster approximation and the associations
between word clusters and Web page classes. With the
reinforcement between these two different knowledge
transfer paths, our approach can achieve better
classification accuracy. In order to deal with the
large-scale real world data, we further develop the
proposed DKT approach by constraining the factor
matrices of NMTF to be cluster indicator matrices. Due
to the nature of cluster indicator matrices, we can
decouple the proposed optimization objective and the
resulted subproblems are of much smaller sizes
involving much less matrix multiplications, which make
our new approach much more computationally efficient.
We evaluate the proposed approach in extensive
experiments using a real world cross-language Web page
data set. Promising results have demonstrated the
effectiveness of our approach that are consistent with
our theoretical analyses.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2015:SIB,
author = "Yang Zhou and Ling Liu",
title = "Social Influence Based Clustering and Optimization
over Heterogeneous Information Networks",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "2:1--2:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2717314",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Social influence analysis has shown great potential
for strategic marketing decision. It is well known that
people influence one another based on both their social
connections and the social activities that they have
engaged in the past. In this article, we develop an
innovative and high-performance social influence based
graph clustering framework with four unique features.
First, we explicitly distinguish social connection
based influence (self-influence) and social activity
based influence (co-influence). We compute the
self-influence similarity between two members based on
their social connections within a single collaboration
network, and compute the co-influence similarity by
taking into account not only the set of activities that
people participate but also the semantic association
between these activities. Second, we define the concept
of influence-based similarity by introducing a unified
influence-based similarity matrix that employs an
iterative weight update method to integrate
self-influence and co-influence similarities. Third, we
design a dynamic learning algorithm, called SI-C
luster, for social influence based graph clustering. It
iteratively partitions a large social collaboration
network into K clusters based on both the social
network itself and the multiple associated activity
information networks, each representing a category of
activities that people have engaged. To make the
SI-Cluster algorithm converge fast, we transform
sophisticated nonlinear fractional programming problem
with respect to multiple weights into a straightforward
nonlinear parametric programming problem of single
variable. Finally, we develop an optimization technique
of diagonalizable-matrix approximation to speed up the
computation of self-influence similarity and
co-influence similarities. Our SI-Cluster-Opt
significantly improves the efficiency of SI-Cluster on
large graphs while maintaining high quality of
clustering results. Extensive experimental evaluation
on three real-world graphs shows that, compared to
existing representative graph clustering algorithms,
our SI-Cluster-Opt approach not only achieves a very
good balance between self-influence and co-influence
similarities but also scales extremely well for
clustering large graphs in terms of time complexity
while meeting the guarantee of high density, low
entropy and low Davies--Bouldin Index.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Papalexakis:2015:PSP,
author = "Evangelos E. Papalexakis and Christos Faloutsos and
Nicholas D. Sidiropoulos",
title = "{ParCube}: Sparse Parallelizable {CANDECOMP--PARAFAC}
Tensor Decomposition",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "3:1--3:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2729980",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How can we efficiently decompose a tensor into sparse
factors, when the data do not fit in memory? Tensor
decompositions have gained a steadily increasing
popularity in data-mining applications; however, the
current state-of-art decomposition algorithms operate
on main memory and do not scale to truly large
datasets. In this work, we propose ParCube, a new and
highly parallelizable method for speeding up tensor
decompositions that is well suited to produce sparse
approximations. Experiments with even moderately large
data indicate over 90\% sparser outputs and 14 times
faster execution, with approximation error close to the
current state of the art irrespective of computation
and memory requirements. We provide theoretical
guarantees for the algorithm's correctness and we
experimentally validate our claims through extensive
experiments, including four different real world
datasets (Enron, Lbnl, Facebook and Nell),
demonstrating its effectiveness for data-mining
practitioners. In particular, we are the first to
analyze the very large Nell dataset using a sparse
tensor decomposition, demonstrating that ParCube
enables us to handle effectively and efficiently very
large datasets. Finally, we make our highly scalable
parallel implementation publicly available, enabling
reproducibility of our work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ahmed:2015:AMC,
author = "Rezwan Ahmed and George Karypis",
title = "Algorithms for Mining the Coevolving Relational Motifs
in Dynamic Networks",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "4:1--4:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2733380",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Computational methods and tools that can efficiently
and effectively analyze the temporal changes in dynamic
complex relational networks enable us to gain
significant insights regarding the entity relations and
their evolution. This article introduces a new class of
dynamic graph patterns, referred to as coevolving
relational motifs (CRMs), which are designed to
identify recurring sets of entities whose relations
change in a consistent way over time. CRMs can provide
evidence to the existence of, possibly unknown,
coordination mechanisms by identifying the relational
motifs that evolve in a similar and highly conserved
fashion. We developed an algorithm to efficiently
analyze the frequent relational changes between the
entities of the dynamic networks and capture all
frequent coevolutions as CRMs. Our algorithm follows a
depth-first exploration of the frequent CRM lattice and
incorporates canonical labeling for redundancy
elimination. Experimental results based on multiple
real world dynamic networks show that the method is
able to efficiently identify CRMs. In addition, a
qualitative analysis of the results shows that the
discovered patterns can be used as features to
characterize the dynamic network.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Campello:2015:HDE,
author = "Ricardo J. G. B. Campello and Davoud Moulavi and
Arthur Zimek and J{\"o}rg Sander",
title = "Hierarchical Density Estimates for Data Clustering,
Visualization, and Outlier Detection",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "5:1--5:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2733381",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "An integrated framework for density-based cluster
analysis, outlier detection, and data visualization is
introduced in this article. The main module consists of
an algorithm to compute hierarchical estimates of the
level sets of a density, following Hartigan's classic
model of density-contour clusters and trees. Such an
algorithm generalizes and improves existing
density-based clustering techniques with respect to
different aspects. It provides as a result a complete
clustering hierarchy composed of all possible
density-based clusters following the nonparametric
model adopted, for an infinite range of density
thresholds. The resulting hierarchy can be easily
processed so as to provide multiple ways for data
visualization and exploration. It can also be further
postprocessed so that: (i) a normalized score of
``outlierness'' can be assigned to each data object,
which unifies both the global and local perspectives of
outliers into a single definition; and (ii) a ``flat''
(i.e., nonhierarchical) clustering solution composed of
clusters extracted from local cuts through the cluster
tree (possibly corresponding to different density
thresholds) can be obtained, either in an unsupervised
or in a semisupervised way. In the unsupervised
scenario, the algorithm corresponding to this
postprocessing module provides a global, optimal
solution to the formal problem of maximizing the
overall stability of the extracted clusters. If
partially labeled objects or instance-level constraints
are provided by the user, the algorithm can solve the
problem by considering both constraints
violations/satisfactions and cluster stability
criteria. An asymptotic complexity analysis, both in
terms of running time and memory space, is described.
Experiments are reported that involve a variety of
synthetic and real datasets, including comparisons with
state-of-the-art, density-based clustering and (global
and local) outlier detection methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Berardi:2015:UTR,
author = "Giacomo Berardi and Andrea Esuli and Fabrizio
Sebastiani",
title = "Utility-Theoretic Ranking for Semiautomated Text
Classification",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "6:1--6:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2742548",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Semiautomated Text Classification (SATC) may be
defined as the task of ranking a set D of automatically
labelled textual documents in such a way that, if a
human annotator validates (i.e., inspects and corrects
where appropriate) the documents in a top-ranked
portion of D with the goal of increasing the overall
labelling accuracy of D, the expected increase is
maximized. An obvious SATC strategy is to rank D so
that the documents that the classifier has labelled
with the lowest confidence are top ranked. In this
work, we show that this strategy is suboptimal. We
develop new utility-theoretic ranking methods based on
the notion of validation gain, defined as the
improvement in classification effectiveness that would
derive by validating a given automatically labelled
document. We also propose a new effectiveness measure
for SATC-oriented ranking methods, based on the
expected reduction in classification error brought
about by partially validating a list generated by a
given ranking method. We report the results of
experiments showing that, with respect to the baseline
method mentioned earlier, and according to the proposed
measure, our utility-theoretic ranking methods can
achieve substantially higher expected reductions in
classification error.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2015:DIP,
author = "Zhiwen Yu and Zhu Wang and Huilei He and Jilei Tian
and Xinjiang Lu and Bin Guo",
title = "Discovering Information Propagation Patterns in
Microblogging Services",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "7:1--7:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2742801",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "During the last decade, microblog has become an
important social networking service with billions of
users all over the world, acting as a novel and
efficient platform for the creation and dissemination
of real-time information. Modeling and revealing the
information propagation patterns in microblogging
services cannot only lead to more accurate
understanding of user behaviors and provide insights
into the underlying sociology, but also enable useful
applications such as trending prediction,
recommendation and filtering, spam detection and viral
marketing. In this article, we aim to reveal the
information propagation patterns in Sina Weibo, the
biggest microblogging service in China. First, the
cascade of each message is represented as a tree based
on its retweeting process. Afterwards, we divide the
information propagation pattern into two levels, that
is, the macro level and the micro level. On one hand,
the macro propagation patterns refer to general
propagation modes that are extracted by grouping
propagation trees based on hierarchical clustering. On
the other hand, the micro propagation patterns are
frequent information flow patterns that are discovered
using tree-based mining techniques. Experimental
results show that several interesting patterns are
extracted, such as popular message propagation,
artificial propagation, and typical information flows
between different types of users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2015:SMB,
author = "Xianchao Zhang and Xiaotong Zhang and Han Liu",
title = "Smart Multitask {Bregman} Clustering and Multitask
Kernel Clustering",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "8:1--8:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2747879",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Traditional clustering algorithms deal with a single
clustering task on a single dataset. However, there are
many related tasks in the real world, which motivates
multitask clustering. Recently some multitask
clustering algorithms have been proposed, and among
them multitask Bregman clustering (MBC) is a very
applicable method. MBC alternatively updates clusters
and learns relationships between clusters of different
tasks, and the two phases boost each other. However,
the boosting does not always have positive effects on
improving the clustering performance, it may also cause
negative effects. Another issue of MBC is that it
cannot deal with nonlinear separable data. In this
article, we show that in MBC, the process of using
cluster relationship to boost the cluster updating
phase may cause negative effects, that is, cluster
centroids may be skewed under some conditions. We
propose a smart multitask Bregman clustering (S-MBC)
algorithm which can identify the negative effects of
the boosting and avoid the negative effects if they
occur. We then propose a multitask kernel clustering
(MKC) framework for nonlinear separable data by using a
similar framework like MBC in the kernel space. We also
propose a specific optimization method, which is quite
different from that of MBC, to implement the MKC
framework. Since MKC can also cause negative effects
like MBC, we further extend the framework of MKC to a
smart multitask kernel clustering (S-MKC) framework in
a similar way that S-MBC is extended from MBC. We
conduct experiments on 10 real world multitask
clustering datasets to evaluate the performance of
S-MBC and S-MKC. The results on clustering accuracy
show that: (1) compared with the original MBC algorithm
MBC, S-MBC and S-MKC perform much better; (2) compared
with the convex discriminative multitask relationship
clustering (DMTRC) algorithms DMTRC-L and DMTRC-R which
also avoid negative transfer, S-MBC and S-MKC perform
worse in the (ideal) case in which different tasks have
the same cluster number and the empirical label
marginal distribution in each task distributes evenly,
but better or comparable in other (more general) cases.
Moreover, S-MBC and S-MKC can work on the datasets in
which different tasks have different number of
clusters, violating the assumptions of DMTRC-L and
DMTRC-R. The results on efficiency show that S-MBC and
S-MKC consume more computational time than MBC and less
computational time than DMTRC-L and DMTRC-R. Overall
S-MBC and S-MKC are competitive compared with the
state-of-the-art multitask clustering algorithms in
synthetical terms of accuracy, efficiency and
applicability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wei:2015:MTP,
author = "Wei Wei and Kathleen M. Carley",
title = "Measuring Temporal Patterns in Dynamic Social
Networks",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "9:1--9:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2749465",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given social networks over time, how can we measure
network activities across different timesteps with a
limited number of metrics? We propose two classes of
dynamic metrics for assessing temporal evolution
patterns of agents in terms of persistency and
emergence. For each class of dynamic metrics, we
implement it using three different temporal aggregation
models ranging from the most commonly used Average
Aggregation Model to more the complex models such as
the Exponential Aggregation Model. We argue that the
problem of measuring temporal patterns can be
formulated using Recency and Primacy effect, which is a
concept used to characterize human cognitive processes.
Experimental results show that the way metrics model
Recency--Primacy effect is closely related to their
abilities to measure temporal patterns. Furthermore,
our results indicate that future network agent
activities can be predicted based on history
information using dynamic metrics. By conducting
multiple experiments, we are also able to find an
optimal length of history information that is most
relevant to future activities. This optimal length is
highly consistent within a dataset and can be used as
an intrinsic metric to evaluate a dynamic social
network.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2015:RAT,
author = "Siyuan Liu and Qiang Qu and Shuhui Wang",
title = "Rationality Analytics from Trajectories",
journal = j-TKDD,
volume = "10",
number = "1",
pages = "10:1--10:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2735634",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jul 28 17:19:31 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The availability of trajectories tracking the
geographical locations of people as a function of time
offers an opportunity to study human behaviors. In this
article, we study rationality from the perspective of
user decision on visiting a point of interest (POI)
which is represented as a trajectory. However, the
analysis of rationality is challenged by a number of
issues, for example, how to model a trajectory in terms
of complex user decision processes? and how to detect
hidden factors that have significant impact on the
rational decision making? In this study, we propose
Rationality Analysis Model (RAM) to analyze rationality
from trajectories in terms of a set of impact factors.
In order to automatically identify hidden factors, we
propose a method, Collective Hidden Factor Retrieval
(CHFR), which can also be generalized to parse multiple
trajectories at the same time or parse individual
trajectories of different time periods. Extensive
experimental study is conducted on three large-scale
real-life datasets (i.e., taxi trajectories, user
shopping trajectories, and visiting trajectories in a
theme park). The results show that the proposed methods
are efficient, effective, and scalable. We also deploy
a system in a large theme park to conduct a field
study. Interesting findings and user feedback of the
field study are provided to support other applications
in user behavior mining and analysis, such as business
intelligence and user management for marketing
purposes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jia:2015:SGR,
author = "Adele Lu Jia and Siqi Shen and Ruud {Van De Bovenkamp}
and Alexandru Iosup and Fernando Kuipers and Dick H. J.
Epema",
title = "Socializing by Gaming: Revealing Social Relationships
in Multiplayer Online Games",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "11:1--11:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2736698",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multiplayer Online Games (MOGs) like Defense of the
Ancients and StarCraft II have attracted hundreds of
millions of users who communicate, interact, and
socialize with each other through gaming. In MOGs, rich
social relationships emerge and can be used to improve
gaming services such as match recommendation and game
population retention, which are important for the user
experience and the commercial value of the companies
who run these MOGs. In this work, we focus on
understanding social relationships in MOGs. We propose
a graph model that is able to capture social
relationships of a variety of types and strengths. We
apply our model to real-world data collected from three
MOGs that contain in total over ten years of behavioral
history for millions of players and matches. We compare
social relationships in MOGs across different game
genres and with regular online social networks like
Facebook. Taking match recommendation as an example
application of our model, we propose SAMRA, a Socially
Aware Match Recommendation Algorithm that takes social
relationships into account. We show that our model not
only improves the precision of traditional link
prediction approaches, but also potentially helps
players enjoy games to a higher extent.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Papagelis:2015:RSG,
author = "Manos Papagelis",
title = "Refining Social Graph Connectivity via Shortcut Edge
Addition",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "12:1--12:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2757281",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Small changes on the structure of a graph can have a
dramatic effect on its connectivity. While in the
traditional graph theory, the focus is on well-defined
properties of graph connectivity, such as
biconnectivity, in the context of a social graph,
connectivity is typically manifested by its ability to
carry on social processes. In this paper, we consider
the problem of adding a small set of nonexisting edges
(shortcuts) in a social graph with the main objective
of minimizing its characteristic path length. This
property determines the average distance between pairs
of vertices and essentially controls how broadly
information can propagate through a network. We
formally define the problem of interest, characterize
its hardness and propose a novel method, path
screening, which quickly identifies important shortcuts
to guide the augmentation of the graph. We devise a
sampling-based variant of our method that can scale up
the computation in larger graphs. The claims of our
methods are formally validated. Through experiments on
real and synthetic data, we demonstrate that our
methods are a multitude of times faster than standard
approaches, their accuracy outperforms sensible
baselines and they can ease the spread of information
in a network, for a varying range of conditions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hong:2015:CAR,
author = "Liang Hong and Lei Zou and Cheng Zeng and Luming Zhang
and Jian Wang and Jilei Tian",
title = "Context-Aware Recommendation Using Role-Based Trust
Network",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "13:1--13:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2751562",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Recommender systems have been studied comprehensively
in both academic and industrial fields over the past
decade. As user interests can be affected by context at
any time and any place in mobile scenarios, rich
context information becomes more and more important for
personalized context-aware recommendations. Although
existing context-aware recommender systems can make
context-aware recommendations to some extent, they
suffer several inherent weaknesses: (1) Users'
context-aware interests are not modeled realistically,
which reduces the recommendation quality; (2) Current
context-aware recommender systems ignore trust
relations among users. Trust relations are actually
context-aware and associated with certain aspects
(i.e., categories of items) in mobile scenarios. In
this article, we define a term role to model common
context-aware interests among a group of users. We
propose an efficient role mining algorithm to mine
roles from a ``user-context-behavior'' matrix, and a
role-based trust model to calculate context-aware trust
value between two users. During online recommendation,
given a user u in a context c, an efficient weighted
set similarity query (WSSQ) algorithm is designed to
build u 's role-based trust network in context c.
Finally, we make recommendations to u based on u 's
role-based trust network by considering both
context-aware roles and trust relations. Extensive
experiments demonstrate that our recommendation
approach outperforms the state-of-the-art methods in
both effectiveness and efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2015:OBF,
author = "Lei Zhang and Ping Luo and Linpeng Tang and Enhong
Chen and Qi Liu and Min Wang and Hui Xiong",
title = "Occupancy-Based Frequent Pattern Mining",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "14:1--14:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2753765",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Frequent pattern mining is an important data mining
problem with many broad applications. Most studies in
this field use support (frequency) to measure the
popularity of a pattern, namely the fraction of
transactions or sequences that include the pattern in a
data set. In this study, we introduce a new interesting
measure, namely occupancy, to measure the completeness
of a pattern in its supporting transactions or
sequences. This is motivated by some real-world pattern
recommendation applications in which an interesting
pattern should not only be frequent, but also occupies
a large portion of its supporting transactions or
sequences. With the definition of occupancy we call a
pattern dominant if its occupancy value is above a
user-specified threshold. Then, our task is to identify
the qualified patterns which are both dominant and
frequent. Also, we formulate the problem of mining
top-k qualified patterns, that is, finding k qualified
patterns with maximum values on a user-defined function
of support and occupancy, for example, weighted sum of
support and occupancy. The challenge to these tasks is
that the value of occupancy does not change
monotonically when more items are appended to a given
pattern. Therefore, we propose a general algorithm
called DOFRA (DOminant and FRequent pattern mining
Algorithm) for mining these qualified patterns, which
explores the upper bound properties on occupancy to
drastically reduce the search process. Finally, we show
the effectiveness of DOFRA in two real-world
applications and also demonstrate the efficiency of
DOFRA on several real and large synthetic datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2015:AAS,
author = "Hung-Hsuan Chen and C. Lee Giles",
title = "{ASCOS++}: an Asymmetric Similarity Measure for
Weighted Networks to Address the Problem of {SimRank}",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "15:1--15:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2776894",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this article, we explore the relationships among
digital objects in terms of their similarity based on
vertex similarity measures. We argue that SimRank --- a
famous similarity measure --- and its families, such as
P-Rank and SimRank++, fail to capture similar node
pairs in certain conditions, especially when two nodes
can only reach each other through paths of odd lengths.
We present new similarity measures ASCOS and ASCOS++ to
address the problem. ASCOS outputs a more complete
similarity score than SimRank and SimRank's families.
ASCOS++ enriches ASCOS to include edge weight into the
measure, giving all edges and network weights an
opportunity to make their contribution. We show that
both ASCOS++ and ASCOS can be reformulated and applied
on a distributed environment for parallel contribution.
Experimental results show that ASCOS++ reports a better
score than SimRank and several famous similarity
measures. Finally, we re-examine previous use cases of
SimRank, and explain appropriate and inappropriate use
cases. We suggest future SimRank users following the
rules proposed here before na{\"\i}vely applying it. We
also discuss the relationship between ASCOS++ and
PageRank.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zafarani:2015:UIA,
author = "Reza Zafarani and Lei Tang and Huan Liu",
title = "User Identification Across Social Media",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "16:1--16:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2747880",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "People use various social media sites for different
purposes. The information on each site is often
partial. When sources of complementary information are
integrated, a better profile of a user can be built.
This profile can help improve online services such as
advertising across sites. To integrate these sources of
information, it is necessary to identify individuals
across social media sites. This paper aims to address
the cross-media user identification problem. We provide
evidence on the existence of a mapping among identities
of individuals across social media sites, study the
feasibility of finding this mapping, and illustrate and
develop means for finding this mapping. Our studies
show that effective approaches that exploit information
redundancies due to users' unique behavioral patterns
can be utilized to find such a mapping. This study
paves the way for analysis and mining across social
networking sites, and facilitates the creation of novel
online services across sites. In particular,
recommending friends and advertising across networks,
analyzing information diffusion across sites, and
studying specific user behavior such as user migration
across sites in social media are one of the many areas
that can benefit from the results of this study.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2015:RUC,
author = "Lei Li and Wei Peng and Saurabh Kataria and Tong Sun
and Tao Li",
title = "Recommending Users and Communities in Social Media",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "17:1--17:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2757282",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Social media has become increasingly prevalent in the
last few years, not only enabling people to connect
with each other by social links, but also providing
platforms for people to share information and interact
over diverse topics. Rich user-generated information,
for example, users' relationships and daily posts, are
often available in most social media service websites.
Given such information, a challenging problem is to
provide reasonable user and community recommendation
for a target user, and consequently, help the target
user engage in the daily discussions and activities
with his/her friends or like-minded people. In this
article, we propose a unified framework of recommending
users and communities that utilizes the information in
social media. Given a user's profile or a set of
keywords as input, our framework is capable of
recommending influential users and topic-cohesive
interactive communities that are most relevant to the
given user or keywords. With the proposed framework,
users can find other individuals or communities sharing
similar interests, and then have more interaction with
these users or within the communities. We present a
generative topic model to discover user-oriented and
community-oriented topics simultaneously, which enables
us to capture the exact topical interests of users, as
well as the focuses of communities. Extensive
experimental evaluation and case studies on a dataset
collected from Twitter demonstrate the effectiveness of
our proposed framework compared with other
probabilistic-topic-model-based recommendation
methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2015:GGA,
author = "Rose Yu and Xinran He and Yan Liu",
title = "{GLAD}: Group Anomaly Detection in Social Media
Analysis",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "18:1--18:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2811268",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Traditional anomaly detection on social media mostly
focuses on individual point anomalies while anomalous
phenomena usually occur in groups. Therefore, it is
valuable to study the collective behavior of
individuals and detect group anomalies. Existing group
anomaly detection approaches rely on the assumption
that the groups are known, which can hardly be true in
real world social media applications. In this article,
we take a generative approach by proposing a
hierarchical Bayes model: Group Latent Anomaly
Detection (GLAD) model. GLAD takes both pairwise and
point-wise data as input, automatically infers the
groups and detects group anomalies simultaneously. To
account for the dynamic properties of the social media
data, we further generalize GLAD to its dynamic
extension d-GLAD. We conduct extensive experiments to
evaluate our models on both synthetic and real world
datasets. The empirical results demonstrate that our
approach is effective and robust in discovering latent
groups and detecting group anomalies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chakrabarti:2015:BPL,
author = "Aniket Chakrabarti and Venu Satuluri and Atreya
Srivathsan and Srinivasan Parthasarathy",
title = "A {Bayesian} Perspective on Locality Sensitive Hashing
with Extensions for Kernel Methods",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "19:1--19:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2778990",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a collection of objects and an associated
similarity measure, the all-pairs similarity search
problem asks us to find all pairs of objects with
similarity greater than a certain user-specified
threshold. In order to reduce the number of candidates
to search, locality-sensitive hashing (LSH) based
indexing methods are very effective. However, most such
methods only use LSH for the first phase of similarity
search --- that is, efficient indexing for candidate
generation. In this article, we present BayesLSH, a
principled Bayesian algorithm for the subsequent phase
of similarity search --- performing candidate pruning
and similarity estimation using LSH. A simpler variant,
BayesLSH-Lite, which calculates similarities exactly,
is also presented. Our algorithms are able to quickly
prune away a large majority of the false positive
candidate pairs, leading to significant speedups over
baseline approaches. For BayesLSH, we also provide
probabilistic guarantees on the quality of the output,
both in terms of accuracy and recall. Finally, the
quality of BayesLSH's output can be easily tuned and
does not require any manual setting of the number of
hashes to use for similarity estimation, unlike
standard approaches. For two state-of-the-art candidate
generation algorithms, AllPairs and LSH, BayesLSH
enables significant speedups, typically in the range 2
$ \times $ --20 $ \times $ for a wide variety of
datasets. We also extend the BayesLSH algorithm for
kernel methods --- in which the similarity between two
data objects is defined by a kernel function. Since the
embedding of data points in the transformed kernel
space is unknown, algorithms such as AllPairs which
rely on building inverted index structure for fast
similarity search do not work with kernel functions.
Exhaustive search across all possible pairs is also not
an option since the dataset can be huge and computing
the kernel values for each pair can be prohibitive. We
propose K-BayesLSH an all-pairs similarity search
problem for kernel functions. K-BayesLSH leverages a
recently proposed idea --- kernelized locality
sensitive hashing (KLSH) --- for hash bit computation
and candidate generation, and uses the aforementioned
BayesLSH idea for candidate pruning and similarity
estimation. We ran a broad spectrum of experiments on a
variety of datasets drawn from different domains and
with distinct kernels and find a speedup of 2 $ \times
$ --7 $ \times $ over vanilla KLSH.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2015:DAV,
author = "Yao Zhang and B. Aditya Prakash",
title = "Data-Aware Vaccine Allocation Over Large Networks",
journal = j-TKDD,
volume = "10",
number = "2",
pages = "20:1--20:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2803176",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Oct 26 17:19:18 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a graph, like a social/computer network or the
blogosphere, in which an infection (or meme or virus)
has been spreading for some time, how to select the k
best nodes for immunization/quarantining immediately?
Most previous works for controlling propagation (say
via immunization) have concentrated on developing
strategies for vaccination preemptively before the
start of the epidemic. While very useful to provide
insights in to which baseline policies can best control
an infection, they may not be ideal to make real-time
decisions as the infection is progressing. In this
paper, we study how to immunize healthy nodes, in the
presence of already infected nodes. Efficient
algorithms for such a problem can help public-health
experts make more informed choices, tailoring their
decisions to the actual distribution of the epidemic on
the ground. First we formulate the Data-Aware
Vaccination problem, and prove it is NP-hard and also
that it is hard to approximate. Secondly, we propose
three effective polynomial-time heuristics DAVA,
DAVA-prune and DAVA-fast, of varying degrees of
efficiency and performance. Finally, we also
demonstrate the scalability and effectiveness of our
algorithms through extensive experiments on multiple
real networks including large epidemiology datasets
(containing millions of interactions). Our algorithms
show substantial gains of up to ten times more healthy
nodes at the end against many other intuitive and
nontrivial competitors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rowe:2016:MUD,
author = "Matthew Rowe",
title = "Mining User Development Signals for Online Community
Churner Detection",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "21:1--21:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2798730",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Churners are users who stop using a given service
after previously signing up. In the domain of
telecommunications and video games, churners represent
a loss of revenue as a user leaving indicates that they
will no longer pay for the service. In the context of
online community platforms (e.g., community message
boards, social networking sites, question--answering
systems, etc.), the churning of a user can represent
different kinds of loss: of social capital, of
expertise, or of a vibrant individual who is a mediator
for interaction and communication. Detecting which
users are likely to churn from online communities,
therefore, enables community managers to offer
incentives to entice those users back; as retention is
less expensive than re-signing users up. In this
article, we tackle the task of detecting churners on
four online community platforms by mining user
development signals. These signals explain how users
have evolved along different dimensions (i.e., social
and lexical) relative to their prior behaviour and the
community in which they have interacted. We present a
linear model, based upon elastic-net regularisation,
that uses extracted features from the signals to detect
churners. Our evaluation of this model against several
state of the art baselines, including our own prior
work, empirically demonstrates the superior performance
that this approach achieves for several experimental
settings. This article presents a novel approach to
churn prediction that takes a different route from
existing approaches that are based on measuring static
social network properties of users (e.g., centrality,
in-degree, etc.).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Prat-Perez:2016:PTT,
author = "Arnau Prat-P{\'e}rez and David Dominguez-Sal and
Josep-M. Brunat and Josep-Lluis Larriba-Pey",
title = "Put Three and Three Together: Triangle-Driven
Community Detection",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "22:1--22:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2775108",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Community detection has arisen as one of the most
relevant topics in the field of graph data mining due
to its applications in many fields such as biology,
social networks, or network traffic analysis. Although
the existing metrics used to quantify the quality of a
community work well in general, under some
circumstances, they fail at correctly capturing such
notion. The main reason is that these metrics consider
the internal community edges as a set, but ignore how
these actually connect the vertices of the community.
We propose the Weighted Community Clustering (WCC),
which is a new community metric that takes the triangle
instead of the edge as the minimal structural motif
indicating the presence of a strong relation in a
graph. We theoretically analyse WCC in depth and
formally prove, by means of a set of properties, that
the maximization of WCC guarantees communities with
cohesion and structure. In addition, we propose
Scalable Community Detection (SCD), a community
detection algorithm based on WCC, which is designed to
be fast and scalable on SMP machines, showing
experimentally that WCC correctly captures the concept
of community in social networks using real datasets.
Finally, using ground-truth data, we show that SCD
provides better quality than the best disjoint
community detection algorithms of the state of the art
while performing faster.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2016:MDM,
author = "Zhen Guo and Zhongfei (Mark) Zhang and Eric P. Xing
and Christos Faloutsos",
title = "Multimodal Data Mining in a Multimedia Database Based
on Structured Max Margin Learning",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "23:1--23:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2742549",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Mining knowledge from a multimedia database has
received increasing attentions recently since huge
repositories are made available by the development of
the Internet. In this article, we exploit the relations
among different modalities in a multimedia database and
present a framework for general multimodal data mining
problem where image annotation and image retrieval are
considered as the special cases. Specifically, the
multimodal data mining problem can be formulated as a
structured prediction problem where we learn the
mapping from an input to the structured and
interdependent output variables. In addition, in order
to reduce the demanding computation, we propose a new
max margin structure learning approach called Enhanced
Max Margin Learning (EMML) framework, which is much
more efficient with a much faster convergence rate than
the existing max margin learning methods, as verified
through empirical evaluations. Furthermore, we apply
EMML framework to develop an effective and efficient
solution to the multimodal data mining problem that is
highly scalable in the sense that the query response
time is independent of the database scale. The EMML
framework allows an efficient multimodal data mining
query in a very large scale multimedia database, and
excels many existing multimodal data mining methods in
the literature that do not scale up at all. The
performance comparison with a state-of-the-art
multimodal data mining method is reported for the
real-world image databases.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Myers:2016:DAK,
author = "Risa B. Myers and John C. Frenzel and Joseph R. Ruiz
and Christopher M. Jermaine",
title = "Do Anesthesiologists Know What They Are Doing?
{Mining} a Surgical Time-Series Database to Correlate
Expert Assessment with Outcomes",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "24:1--24:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2822897",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Anesthesiologists are taught to carefully manage
patient vital signs during surgery. Unfortunately,
there is little empirical evidence that vital sign
management, as currently practiced, is correlated with
patient outcomes. We seek to validate or repudiate
current clinical practice and determine whether or not
clinician evaluation of surgical vital signs correlate
with outcomes. Using a database of over 90,000 cases,
we attempt to determine whether those cases that
anesthesiologists would subjectively decide are ``low
quality'' are more likely to result in negative
outcomes. The problem reduces to one of
multi-dimensional time-series classification. Our
approach is to have a set of expert anesthesiologists
independently label a small number of training cases,
from which we build classifiers and label all 90,000
cases. We then use the labeling to search for
correlation with outcomes and compare the prevalence of
important 30-day outcomes between providers. To mimic
the providers' quality labels, we consider several
standard classification methods, such as dynamic time
warping in conjunction with a kNN classifier, as well
as complexity invariant distance, and a regression
based upon the feature extraction methods outlined by
Mao et al. 2012 (using features such as time-series
mean, standard deviation, skew, etc.). We also propose
a new feature selection mechanism that learns a hidden
Markov model to segment the time series; the fraction
of time that each series spends in each state is used
to label the series using a regression-based
classifier. In the end, we obtain strong, empirical
evidence that current best practice is correlated with
reduced negative patient outcomes. We also learn that
all of the experts were able to significantly separate
cases by outcome, with higher prevalence of negative
30-day outcomes in the cases labeled as ``low quality''
for almost all of the outcomes investigated.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Namata:2016:CGI,
author = "Galileo Mark Namata and Ben London and Lise Getoor",
title = "Collective Graph Identification",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "25:1--25:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818378",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Data describing networks---such as communication
networks, transaction networks, disease transmission
networks, collaboration networks, etc.---are becoming
increasingly available. While observational data can be
useful, it often only hints at the actual underlying
process that governs interactions and attributes. For
example, an email communication network provides
insight into its users and their relationships, but is
not the same as the ``real'' underlying social network.
In this article, we introduce the problem of graph
identification, i.e., discovering the latent graph
structure underlying an observed network. We cast the
problem as a probabilistic inference task, in which we
must infer the nodes, edges, and node labels of a
hidden graph, based on evidence. This entails solving
several canonical problems in network analysis: entity
resolution (determining when two observations
correspond to the same entity), link prediction
(inferring the existence of links), and node labeling
(inferring hidden attributes). While each of these
subproblems has been well studied in isolation, here we
consider them as a single, collective task. We present
a simple, yet novel, approach to address all three
subproblems simultaneously. Our approach, which we
refer to as C$^3$, consists of a collection of Coupled
Collective Classifiers that are applied iteratively to
propagate inferred information among the subproblems.
We consider variants of C$^3$ using different learning
and inference techniques and empirically demonstrate
that C$^3$ is superior, both in terms of predictive
accuracy and running time, to state-of-the-art
probabilistic approaches on four real problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Subbian:2016:MIU,
author = "Karthik Subbian and Charu Aggarwal and Jaideep
Srivastava",
title = "Mining Influencers Using Information Flows in Social
Streams",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "26:1--26:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2815625",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The problem of discovering information flow trends in
social networks has become increasingly relevant due to
the increasing amount of content in online social
networks, and its relevance as a tool for research into
the content trends analysis in the network. An
important part of this analysis is to determine the key
patterns of flow in the underlying network. Almost all
the work in this area has focused on fixed models of
the network structure, and edge-based transmission
between nodes. In this article, we propose a fully
content-centered model of flow analysis in networks, in
which the analysis is based on actual content
transmissions in the underlying social stream, rather
than a static model of transmission on the edges.
First, we introduce the problem of influence analysis
in the context of information flow in networks. We then
propose a novel algorithm InFlowMine to discover the
information flow patterns in the network and
demonstrate the effectiveness of the discovered
information flows using an influence mining
application. This application illustrates the
flexibility and effectiveness of our information flow
model to find topic- or network-specific influencers,
or their combinations. We empirically show that our
information flow mining approach is effective and
efficient than the existing methods on a number of
different measures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Angiulli:2016:TGU,
author = "Fabrizio Angiulli and Fabio Fassetti",
title = "Toward Generalizing the Unification with Statistical
Outliers: The Gradient Outlier Factor Measure",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "27:1--27:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2829956",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this work, we introduce a novel definition of
outlier, namely the Gradient Outlier Factor (or GOF),
with the aim to provide a definition that unifies with
the statistical one on some standard distributions but
has a different behavior in the presence of mixture
distributions. Intuitively, the GOF score measures the
probability to stay in the neighborhood of a certain
object. It is directly proportional to the density and
inversely proportional to the variation of the density.
We derive formal properties under which the GOF
definition unifies the statistical outlier definition
and show that the unification holds for some standard
distributions, while the GOF is able to capture tails
in the presence of different distributions even if
their densities sensibly differ. Moreover, we provide a
probabilistic interpretation of the GOF score, by means
of the notion of density of the data density.
Experimental results confirm that there are scenarios
in which the novel definition can be profitably
employed. To the best of our knowledge, except for
distance-based outlier, no other data mining outlier
definition has a so clearly established relationship
with statistical outliers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Koutra:2016:DPM,
author = "Danai Koutra and Neil Shah and Joshua T. Vogelstein
and Brian Gallagher and Christos Faloutsos",
title = "{DeltaCon}: Principled Massive-Graph Similarity
Function with Attribution",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "28:1--28:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2824443",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How much has a network changed since yesterday? How
different is the wiring of Bob's brain (a left-handed
male) and Alice's brain (a right-handed female), and
how is it different? Graph similarity with given node
correspondence, i.e., the detection of changes in the
connectivity of graphs, arises in numerous settings. In
this work, we formally state the axioms and desired
properties of the graph similarity functions, and
evaluate when state-of-the-art methods fail to detect
crucial connectivity changes in graphs. We propose D
eltaCon, a principled, intuitive, and scalable
algorithm that assesses the similarity between two
graphs on the same nodes (e.g., employees of a company,
customers of a mobile carrier). In conjunction, we
propose DeltaCon-Attr, a related approach that enables
attribution of change or dissimilarity to responsible
nodes and edges. Experiments on various synthetic and
real graphs showcase the advantages of our method over
existing similarity measures. Finally, we employ
DeltaCon and DeltaCon-Attr on real applications: (a) we
classify people to groups of high and low creativity
based on their brain connectivity graphs, (b) do
temporal anomaly detection in the who-emails-whom Enron
graph and find the top culprits for the changes in the
temporal corporate email graph, and (c) recover pairs
of test-retest large brain scans ({\sim}17M edges, up
to 90M edges) for 21 subjects.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2016:MPA,
author = "Wayne Xin Zhao and Jinpeng Wang and Yulan He and
Ji-Rong Wen and Edward Y. Chang and Xiaoming Li",
title = "Mining Product Adopter Information from Online Reviews
for Improving Product Recommendation",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "29:1--29:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2842629",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present in this article an automated framework that
extracts product adopter information from online
reviews and incorporates the extracted information into
feature-based matrix factorization for more effective
product recommendation. In specific, we propose a
bootstrapping approach for the extraction of product
adopters from review text and categorize them into a
number of different demographic categories. The
aggregated demographic information of many product
adopters can be used to characterize both products and
users in the form of distributions over different
demographic categories. We further propose a
graph-based method to iteratively update user- and
product-related distributions more reliably in a
heterogeneous user--product graph and incorporate them
as features into the matrix factorization approach for
product recommendation. Our experimental results on a
large dataset crawled from J ingDong, the largest B2C
e-commerce website in China, show that our proposed
framework outperforms a number of competitive baselines
for product recommendation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Duarte:2016:AMR,
author = "Jo{\~a}o Duarte and Jo{\~a}o Gama and Albert Bifet",
title = "Adaptive Model Rules From High-Speed Data Streams",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "30:1--30:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2829955",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Decision rules are one of the most expressive and
interpretable models for machine learning. In this
article, we present Adaptive Model Rules (AMRules), the
first stream rule learning algorithm for regression
problems. In AMRules, the antecedent of a rule is a
conjunction of conditions on the attribute values, and
the consequent is a linear combination of the
attributes. In order to maintain a regression model
compatible with the most recent state of the process
generating data, each rule uses a Page-Hinkley test to
detect changes in this process and react to changes by
pruning the rule set. Online learning might be strongly
affected by outliers. AMRules is also equipped with
outliers detection mechanisms to avoid model adaption
using anomalous examples. In the experimental section,
we report the results of AMRules on benchmark
regression problems, and compare the performance of our
system with other streaming regression algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lu:2016:SCB,
author = "Faming Lu and Qingtian Zeng and Hua Duan",
title = "Synchronization-Core-Based Discovery of Processes with
Decomposable Cyclic Dependencies",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "31:1--31:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2845086",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Traditional process discovery techniques mine process
models based upon event traces giving little
consideration to workflow relevant data recorded in
event logs. The neglect of such information usually
leads to incorrect discovered models, especially when
activities have decomposable cyclic dependencies. To
address this problem, the recorded workflow relevant
data and decision tree learning technique are utilized
to classify cases into case clusters. Each case cluster
contains causality and concurrency activity
dependencies only. Then, a set of activity ordering
relations are derived based on case clusters. And a
synchronization-core-based process model is discovered
from the ordering relations and composite cases.
Finally, the discovered model is transformed to a BPMN
model. The proposed approach is validated with a
medical treatment process and an open event log.
Meanwhile, a prototype system is presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2016:EAW,
author = "Yashu Liu and Jie Wang and Jieping Ye",
title = "An Efficient Algorithm For Weak Hierarchical Lasso",
journal = j-TKDD,
volume = "10",
number = "3",
pages = "32:1--32:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2791295",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 25 05:56:34 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Linear regression is a widely used tool in data mining
and machine learning. In many applications, fitting a
regression model with only linear effects may not be
sufficient for predictive or explanatory purposes. One
strategy that has recently received increasing
attention in statistics is to include feature
interactions to capture the nonlinearity in the
regression model. Such model has been applied
successfully in many biomedical applications. One major
challenge in the use of such model is that the data
dimensionality is significantly higher than the
original data, resulting in the small sample size large
dimension problem. Recently, weak hierarchical Lasso, a
sparse interaction regression model, is proposed that
produces a sparse and hierarchical structured estimator
by exploiting the Lasso penalty and a set of
hierarchical constraints. However, the hierarchical
constraints make it a non-convex problem and the
existing method finds the solution to its convex
relaxation, which needs additional conditions to
guarantee the hierarchical structure. In this article,
we propose to directly solve the non-convex weak
hierarchical Lasso by making use of the General
Iterative Shrinkage and Thresholding (GIST)
optimization framework, which has been shown to be
efficient for solving non-convex sparse formulations.
The key step in GIST is to compute a sequence of
proximal operators. One of our key technical
contributions is to show that the proximal operator
associated with the non-convex weak hierarchical Lasso
admits a closed-form solution. However, a naive
approach for solving each subproblem of the proximal
operator leads to a quadratic time complexity, which is
not desirable for large-size problems. We have
conducted extensive experiments on both synthetic and
real datasets. Results show that our proposed algorithm
is much more efficient and effective than its convex
relaxation. To this end, we further develop an
efficient algorithm for computing the subproblems with
a linearithmic time complexity. In addition, we extend
the technique to perform the optimization-based
hierarchical testing of pairwise interactions for
binary classification problems, which is essentially
the proximal operator associated with weak hierarchical
Lasso. Simulation studies show that the non-convex
hierarchical testing framework outperforms the convex
relaxation when a hierarchical structure exists between
main effects and interactions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2016:ISI,
author = "Wei Wang and Jure Leskovec",
title = "Introduction to the Special Issue of Best Papers in
{ACM SIGKDD 2014}",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "33:1--33:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2936718",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2016:PSP,
author = "Silei Xu and John C. S. Lui",
title = "Product Selection Problem: Improve Market Share by
Learning Consumer Behavior",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "34:1--34:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2753764",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "It is often crucial for manufacturers to decide what
products to produce so that they can increase their
market share in an increasingly fierce market. To
decide which products to produce, manufacturers need to
analyze the consumers' requirements and how consumers
make their purchase decisions so that the new products
will be competitive in the market. In this paper, we
first present a general distance-based product adoption
model to capture consumers' purchase behavior. Using
this model, various distance metrics can be used to
describe different real life purchase behavior. We then
provide a learning algorithm to decide which set of
distance metrics one should use when we are given some
accessible historical purchase data. Based on the
product adoption model, we formalize the k most
marketable products (or $k$-MMP) selection problem and
formally prove that the problem is NP-hard. To tackle
this problem, we propose an efficient greedy-based
approximation algorithm with a provable solution
guarantee. Using submodularity analysis, we prove that
our approximation algorithm can achieve at least 63\%
of the optimal solution. We apply our algorithm on both
synthetic datasets and real-world datasets
(TripAdvisor.com), and show that our algorithm can
easily achieve five or more orders of speedup over the
exhaustive search and achieve about 96\% of the optimal
solution on average. Our experiments also demonstrate
the robustness of our distance metric learning method,
and illustrate how one can adopt it to improve the
accuracy of product selection.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2016:CSB,
author = "Meng Jiang and Peng Cui and Alex Beutel and Christos
Faloutsos and Shiqiang Yang",
title = "Catching Synchronized Behaviors in Large Networks: a
Graph Mining Approach",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "35:1--35:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2746403",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a directed graph of millions of nodes, how can
we automatically spot anomalous, suspicious nodes
judging only from their connectivity patterns?
Suspicious graph patterns show up in many applications,
from Twitter users who buy fake followers, manipulating
the social network, to botnet members performing
distributed denial of service attacks, disturbing the
network traffic graph. We propose a fast and effective
method, C atchSync, which exploits two of the tell-tale
signs left in graphs by fraudsters: (a) synchronized
behavior: suspicious nodes have extremely similar
behavior patterns because they are often required to
perform some task together (such as follow the same
user); and (b) rare behavior: their connectivity
patterns are very different from the majority. We
introduce novel measures to quantify both concepts
(``synchronicity'' and ``normality'') and we propose a
parameter-free algorithm that works on the resulting
synchronicity-normality plots. Thanks to careful
design, CatchSync has the following desirable
properties: (a) it is scalable to large datasets, being
linear in the graph size; (b) it is parameter free; and
(c) it is side-information-oblivious: it can operate
using only the topology, without needing labeled data,
nor timing information, and the like., while still
capable of using side information if available. We
applied CatchSync on three large, real datasets,
1-billion-edge Twitter social graph, 3-billion-edge,
and 12-billion-edge Tencent Weibo social graphs, and
several synthetic ones; CatchSync consistently
outperforms existing competitors, both in detection
accuracy by 36\% on Twitter and 20\% on Tencent Weibo,
as well as in speed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wei:2016:HTH,
author = "Ying Wei and Yangqiu Song and Yi Zhen and Bo Liu and
Qiang Yang",
title = "Heterogeneous Translated Hashing: a Scalable Solution
Towards Multi-Modal Similarity Search",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "36:1--36:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2744204",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multi-modal similarity search has attracted
considerable attention to meet the need of information
retrieval across different types of media. To enable
efficient multi-modal similarity search in large-scale
databases recently, researchers start to study
multi-modal hashing. Most of the existing methods are
applied to search across multi-views among which
explicit correspondence is provided. Given a
multi-modal similarity search task, we observe that
abundant multi-view data can be found on the Web which
can serve as an auxiliary bridge. In this paper, we
propose a Heterogeneous Translated Hashing (HTH) method
with such auxiliary bridge incorporated not only to
improve current multi-view search but also to enable
similarity search across heterogeneous media which have
no direct correspondence. HTH provides more flexible
and discriminative ability by embedding heterogeneous
media into different Hamming spaces, compared to almost
all existing methods that map heterogeneous data in a
common Hamming space. We formulate a joint optimization
model to learn hash functions embedding heterogeneous
media into different Hamming spaces, and a translator
aligning different Hamming spaces. The extensive
experiments on two real-world datasets, one publicly
available dataset of Flickr, and the other
MIRFLICKR-Yahoo Answers dataset, highlight the
effectiveness and efficiency of our algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tong:2016:GES,
author = "Hanghang Tong and Fei Wang and Munmun De Choudhury and
Zoran Obradovic",
title = "Guest Editorial: Special Issue on Connected Health at
Big Data Era {(BigChat)}: a {TKDD} Special Issue",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "37:1--37:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2912122",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xiong:2016:KIT,
author = "Feiyu Xiong and Moshe Kam and Leonid Hrebien and
Beilun Wang and Yanjun Qi",
title = "Kernelized Information-Theoretic Metric Learning for
Cancer Diagnosis Using High-Dimensional Molecular
Profiling Data",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "38:1--38:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2789212",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "With the advancement of genome-wide monitoring
technologies, molecular expression data have become
widely used for diagnosing cancer through tumor or
blood samples. When mining molecular signature data,
the process of comparing samples through an adaptive
distance function is fundamental but difficult, as such
datasets are normally heterogeneous and high
dimensional. In this article, we present kernelized
information-theoretic metric learning (KITML)
algorithms that optimize a distance function to tackle
the cancer diagnosis problem and scale to high
dimensionality. By learning a nonlinear transformation
in the input space implicitly through kernelization,
KITML permits efficient optimization, low storage, and
improved learning of distance metric. We propose two
novel applications of KITML for diagnosing cancer using
high-dimensional molecular profiling data: (1) for
sample-level cancer diagnosis, the learned metric is
used to improve the performance of k -nearest neighbor
classification; and (2) for estimating the severity
level or stage of a group of samples, we propose a
novel set-based ranking approach to extend KITML. For
the sample-level cancer classification task, we have
evaluated on 14 cancer gene microarray datasets and
compared with eight other state-of-the-art approaches.
The results show that our approach achieves the best
overall performance for the task of
molecular-expression-driven cancer sample diagnosis.
For the group-level cancer stage estimation, we test
the proposed set-KITML approach using three multi-stage
cancer microarray datasets, and correctly estimated the
stages of sample groups for all three studies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2016:JML,
author = "Pei Yang and Hongxia Yang and Haoda Fu and Dawei Zhou
and Jieping Ye and Theodoros Lappas and Jingrui He",
title = "Jointly Modeling Label and Feature Heterogeneity in
Medical Informatics",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "39:1--39:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2768831",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multiple types of heterogeneity including label
heterogeneity and feature heterogeneity often co-exist
in many real-world data mining applications, such as
diabetes treatment classification, gene functionality
prediction, and brain image analysis. To effectively
leverage such heterogeneity, in this article, we
propose a novel graph-based model for Learning with
both Label and Feature heterogeneity, namely L$^2$F. It
models the label correlation by requiring that any two
label-specific classifiers behave similarly on the same
views if the associated labels are similar, and imposes
the view consistency by requiring that view-based
classifiers generate similar predictions on the same
examples. The objective function for L$^2$F is jointly
convex. To solve the optimization problem, we propose
an iterative algorithm, which is guaranteed to converge
to the global optimum. One appealing feature of L$^2$F
is that it is capable of handling data with missing
views and labels. Furthermore, we analyze its
generalization performance based on Rademacher
complexity, which sheds light on the benefits of
jointly modeling the label and feature heterogeneity.
Experimental results on various biomedical datasets
show the effectiveness of the proposed approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2016:MDN,
author = "Yubao Wu and Xiaofeng Zhu and Li Li and Wei Fan and
Ruoming Jin and Xiang Zhang",
title = "Mining Dual Networks: Models, Algorithms, and
Applications",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "40:1--40:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2785970",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Finding the densest subgraph in a single graph is a
fundamental problem that has been extensively studied.
In many emerging applications, there exist dual
networks. For example, in genetics, it is important to
use protein interactions to interpret genetic
interactions. In this application, one network
represents physical interactions among nodes, for
example, protein--protein interactions, and another
network represents conceptual interactions, for
example, genetic interactions. Edges in the conceptual
network are usually derived based on certain
correlation measure or statistical test measuring the
strength of the interaction. Two nodes with strong
conceptual interaction may not have direct physical
interaction. In this article, we propose the novel
dual-network model and investigate the problem of
finding the densest connected subgraph (DCS), which has
the largest density in the conceptual network and is
also connected in the physical network. Density in the
conceptual network represents the average strength of
the measured interacting signals among the set of
nodes. Connectivity in the physical network shows how
they interact physically. Such pattern cannot be
identified using the existing algorithms for a single
network. We show that even though finding the densest
subgraph in a single network is polynomial time
solvable, the DCS problem is NP-hard. We develop a
two-step approach to solve the DCS problem. In the
first step, we effectively prune the dual networks,
while guarantee that the optimal solution is contained
in the remaining networks. For the second step, we
develop two efficient greedy methods based on different
search strategies to find the DCS. Different variations
of the DCS problem are also studied. We perform
extensive experiments on a variety of real and
synthetic dual networks to evaluate the effectiveness
and efficiency of the developed methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cui:2016:BOQ,
author = "Licong Cui and Shiqiang Tao and Guo-Qiang Zhang",
title = "Biomedical Ontology Quality Assurance Using a Big Data
Approach",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "41:1--41:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2768830",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This article presents recent progresses made in using
scalable cloud computing environment, Hadoop and
MapReduce, to perform ontology quality assurance (OQA),
and points to areas of future opportunity. The standard
sequential approach used for implementing OQA methods
can take weeks if not months for exhaustive analyses
for large biomedical ontological systems. With OQA
methods newly implemented using massively parallel
algorithms in the MapReduce framework, several orders
of magnitude in speed-up can be achieved (e.g., from
three months to three hours). Such dramatically reduced
time makes it feasible not only to perform exhaustive
structural analysis of large ontological hierarchies,
but also to systematically track structural changes
between versions for evolutional analysis. As an
exemplar, progress is reported in using MapReduce to
perform evolutional analysis and visualization on the
Systemized Nomenclature of Medicine-Clinical Terms
(SNOMED CT), a prominent clinical terminology system.
Future opportunities in three areas are described: one
is to extend the scope of MapReduce-based approach to
existing OQA methods, especially for automated
exhaustive structural analysis. The second is to apply
our proposed MapReduce Pipeline for Lattice-based
Evaluation (MaPLE) approach, demonstrated as an
exemplar method for SNOMED CT, to other biomedical
ontologies. The third area is to develop interfaces for
reviewing results obtained by OQA methods and for
visualizing ontological alignment and evolution, which
can also take advantage of cloud computing technology
to systematically pre-compute computationally intensive
jobs in order to increase performance during user
interactions with the visualization interface. Advances
in these directions are expected to better support the
ontological engineering lifecycle.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rayana:2016:LMB,
author = "Shebuti Rayana and Leman Akoglu",
title = "Less is More: Building Selective Anomaly Ensembles",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "42:1--42:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2890508",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Ensemble learning for anomaly detection has been
barely studied, due to difficulty in acquiring ground
truth and the lack of inherent objective functions. In
contrast, ensemble approaches for classification and
clustering have been studied and effectively used for
long. Our work taps into this gap and builds a new
ensemble approach for anomaly detection, with
application to event detection in temporal graphs as
well as outlier detection in no-graph settings. It
handles and combines multiple heterogeneous detectors
to yield improved and robust performance. Importantly,
trusting results from all the constituent detectors may
deteriorate the overall performance of the ensemble, as
some detectors could provide inaccurate results
depending on the type of data in hand and the
underlying assumptions of a detector. This suggests
that combining the detectors selectively is key to
building effective anomaly ensembles-hence ``less is
more''. In this paper we propose a novel ensemble
approach called SELECT for anomaly detection, which
automatically and systematically selects the results
from constituent detectors to combine in a fully
unsupervised fashion. We apply our method to event
detection in temporal graphs and outlier detection in
multi-dimensional point data (no-graph), where SELECT
successfully utilizes five base detectors and seven
consensus methods under a unified ensemble framework.
We provide extensive quantitative evaluation of our
approach for event detection on five real-world
datasets (four with ground truth events), including
Enron email communications, RealityMining SMS and phone
call records, New York Times news corpus, and World Cup
2014 Twitter news feed. We also provide results for
outlier detection on seven real-world multi-dimensional
point datasets from UCI Machine Learning Repository.
Thanks to its selection mechanism, SELECT yields
superior performance compared to the individual
detectors alone, the full ensemble (naively combining
all results), an existing diversity-based ensemble, and
an existing weighted ensemble approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2016:CCS,
author = "Yada Zhu and Jingrui He",
title = "Co-Clustering Structural Temporal Data with
Applications to Semiconductor Manufacturing",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "43:1--43:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2875427",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Recent years have witnessed data explosion in
semiconductor manufacturing due to advances in
instrumentation and storage techniques. The large
amount of data associated with process variables
monitored over time form a rich reservoir of
information, which can be used for a variety of
purposes, such as anomaly detection, quality control,
and fault diagnostics. In particular, following the
same recipe for a certain Integrated Circuit device,
multiple tools and chambers can be deployed for the
production of this device, during which multiple time
series can be collected, such as temperature,
impedance, gas flow, electric bias, etc. These time
series naturally fit into a two-dimensional array
(matrix), i.e., each element in this array corresponds
to a time series for one process variable from one
chamber. To leverage the rich structural information in
such temporal data, in this article, we propose a novel
framework named C-Struts to simultaneously cluster on
the two dimensions of this array. In this framework, we
interpret the structural information as a set of
constraints on the cluster membership, introduce an
auxiliary probability distribution accordingly, and
design an iterative algorithm to assign each time
series to a certain cluster on each dimension.
Furthermore, we establish the equivalence between
C-Struts and a generic optimization problem, which is
able to accommodate various distance functions.
Extensive experiments on synthetic, benchmark, as well
as manufacturing datasets demonstrate the effectiveness
of the proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tahani:2016:IDD,
author = "Maryam Tahani and Ali M. A. Hemmatyar and Hamid R.
Rabiee and Maryam Ramezani",
title = "Inferring Dynamic Diffusion Networks in Online Media",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "44:1--44:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2882968",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Online media play an important role in information
societies by providing a convenient infrastructure for
different processes. Information diffusion that is a
fundamental process taking place on social and
information networks has been investigated in many
studies. Research on information diffusion in these
networks faces two main challenges: (1) In most cases,
diffusion takes place on an underlying network, which
is latent and its structure is unknown. (2) This latent
network is not fixed and changes over time. In this
article, we investigate the diffusion network
extraction (DNE) problem when the underlying network is
dynamic and latent. We model the diffusion behavior
(existence probability) of each edge as a stochastic
process and utilize the Hidden Markov Model (HMM) to
discover the most probable diffusion links according to
the current observation of the diffusion process, which
is the infection time of nodes and the past diffusion
behavior of links. We evaluate the performance of our
Dynamic Diffusion Network Extraction (DDNE) method, on
both synthetic and real datasets. Experimental results
show that the performance of the proposed method is
independent of the cascade transmission model and
outperforms the state of art method in terms of
F-measure.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Koh:2016:URP,
author = "Yun Sing Koh and Sri Devi Ravana",
title = "Unsupervised Rare Pattern Mining: a Survey",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "45:1--45:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2898359",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Association rule mining was first introduced to
examine patterns among frequent items. The original
motivation for seeking these rules arose from need to
examine customer purchasing behaviour in supermarket
transaction data. It seeks to identify combinations of
items or itemsets, whose presence in a transaction
affects the likelihood of the presence of another
specific item or itemsets. In recent years, there has
been an increasing demand for rare association rule
mining. Detecting rare patterns in data is a vital
task, with numerous high-impact applications including
medical, finance, and security. This survey aims to
provide a general, comprehensive, and structured
overview of the state-of-the-art methods for rare
pattern mining. We investigate the problems in finding
rare rules using traditional association rule mining.
As rare association rule mining has not been well
explored, there is still specific groundwork that needs
to be established. We will discuss some of the major
issues in rare association rule mining and also look at
current algorithms. As a contribution, we give a
general framework for categorizing algorithms: Apriori
and Tree based. We highlight the differences between
these methods. Finally, we present several real-world
application using rare pattern mining in diverse
domains. We conclude our survey with a discussion on
open and practical challenges in the field.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cheng:2016:CFR,
author = "Wei Cheng and Zhishan Guo and Xiang Zhang and Wei
Wang",
title = "{CGC}: a Flexible and Robust Approach to Integrating
Co-Regularized Multi-Domain Graph for Clustering",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "46:1--46:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2903147",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Multi-view graph clustering aims to enhance clustering
performance by integrating heterogeneous information
collected in different domains. Each domain provides a
different view of the data instances. Leveraging
cross-domain information has been demonstrated an
effective way to achieve better clustering results.
Despite the previous success, existing multi-view graph
clustering methods usually assume that different views
are available for the same set of instances. Thus,
instances in different domains can be treated as having
strict one-to-one relationship. In many real-life
applications, however, data instances in one domain may
correspond to multiple instances in another domain.
Moreover, relationships between instances in different
domains may be associated with weights based on prior
(partial) knowledge. In this article, we propose a
flexible and robust framework, Co-regularized Graph
Clustering (CGC), based on non-negative matrix
factorization (NMF), to tackle these challenges. CGC
has several advantages over the existing methods.
First, it supports many-to-many cross-domain instance
relationship. Second, it incorporates weight on
cross-domain relationship. Third, it allows partial
cross-domain mapping so that graphs in different
domains may have different sizes. Finally, it provides
users with the extent to which the cross-domain
instance relationship violates the in-domain clustering
structure, and thus enables users to re-evaluate the
consistency of the relationship. We develop an
efficient optimization method that guarantees to find
the global optimal solution with a given confidence
requirement. The proposed method can automatically
identify noisy domains and assign smaller weights to
them. This helps to obtain optimal graph partition for
the focused domain. Extensive experimental results on
UCI benchmark datasets, newsgroup datasets, and
biological interaction networks demonstrate the
effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shen:2016:SPO,
author = "Chih-Ya Shen and De-Nian Yang and Wang-Chien Lee and
Ming-Syan Chen",
title = "Spatial-Proximity Optimization for Rapid Task Group
Deployment",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "47:1--47:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818714",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Spatial proximity is one of the most important factors
for the quick deployment of the task groups in various
time-sensitive missions. This article proposes a new
spatial query, Spatio-Social Team Query (SSTQ), that
forms a strong task group by considering (1) the
group's spatial distance (i.e., transportation time),
(2) skills of the candidate group members, and (3)
social rapport among the candidates. Efficient
processing of SSTQ is very challenging, because the
aforementioned spatial, skill, and social factors need
to be carefully examined. In this article, therefore,
we first formulate two subproblems of SSTQ, namely
Hop-Constrained Team Problem (HCTP) and
Connection-Oriented Team Query (COTQ). HCTP is a
decision problem that considers only social and skill
dimensions. We prove that HCTP is NP-Complete.
Moreover, based on the hardness of HCTP, we prove that
SSTQ is NP-Hard and inapproximable within any factor.
On the other hand, COTQ is a special case of SSTQ that
relaxes the social constraint. We prove that COTQ is
NP-Hard and propose an approximation algorithm for
COTQ, namely COTprox. Furthermore, based on the
observations on COTprox, we devise an approximation
algorithm, SSTprox, with a guaranteed error bound for
SSTQ. Finally, to efficiently obtain the optimal
solution to SSTQ for small instances, we design two
efficient algorithms, SpatialFirst and SkillFirst, with
different scenarios in mind. These two algorithms
incorporate various effective ordering and pruning
techniques to reduce the search space for answering
SSTQ. Experimental results on real datasets indicate
that the proposed algorithms can efficiently answer
SSTQ under various parameter settings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2016:FDV,
author = "Zhiwen Yu and Zhitao Wang and Liming Chen and Bin Guo
and Wenjie Li",
title = "Featuring, Detecting, and Visualizing Human Sentiment
in {Chinese} Micro-Blog",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "48:1--48:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2821513",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Micro-blog has been increasingly used for the public
to express their opinions, and for organizations to
detect public sentiment about social events or public
policies. In this article, we examine and identify the
key problems of this field, focusing particularly on
the characteristics of innovative words, multi-media
elements, and hierarchical structure of Chinese
``Weibo.'' Based on the analysis, we propose a novel
approach and develop associated theoretical and
technological methods to address these problems. These
include a new sentiment word mining method based on
three wording metrics and point-wise information, a
rule set model for analyzing sentiment features of
different linguistic components, and the corresponding
methodology for calculating sentiment on
multi-granularity considering emoticon elements as
auxiliary affective factors. We evaluate our new word
discovery and sentiment detection methods on a
real-life Chinese micro-blog dataset. Initial results
show that our new diction can improve sentiment
detection, and they demonstrate that our multi-level
rule set method is more effective, with the average
accuracy being 10.2\% and 1.5\% higher than two
existing methods for Chinese micro-blog sentiment
analysis. In addition, we exploit visualization
techniques to study the relationships between online
sentiment and real life. The visualization of detected
sentiment can help depict temporal patterns and spatial
discrepancy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2016:EOL,
author = "Chen Chen and Hanghang Tong and B. Aditya Prakash and
Tina Eliassi-Rad and Michalis Faloutsos and Christos
Faloutsos",
title = "Eigen-Optimization on Large Graphs by Edge
Manipulation",
journal = j-TKDD,
volume = "10",
number = "4",
pages = "49:1--49:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2903148",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:29 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Large graphs are prevalent in many applications and
enable a variety of information dissemination
processes, e.g., meme, virus, and influence
propagation. How can we optimize the underlying graph
structure to affect the outcome of such dissemination
processes in a desired way (e.g., stop a virus
propagation, facilitate the propagation of a piece of
good idea, etc)? Existing research suggests that the
leading eigenvalue of the underlying graph is the key
metric in determining the so-called epidemic threshold
for a variety of dissemination models. In this paper,
we study the problem of how to optimally place a set of
edges (e.g., edge deletion and edge addition) to
optimize the leading eigenvalue of the underlying
graph, so that we can guide the dissemination process
in a desired way. We propose effective, scalable
algorithms for edge deletion and edge addition,
respectively. In addition, we reveal the intrinsic
relationship between edge deletion and node deletion
problems. Experimental results validate the
effectiveness and efficiency of the proposed
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2016:STR,
author = "Zhiwen Yu and Miao Tian and Zhu Wang and Bin Guo and
Tao Mei",
title = "Shop-Type Recommendation Leveraging the Data from
Social Media and Location-Based Services",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "1:1--1:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930671",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "It is an important yet challenging task for investors
to determine the most suitable type of shop (e.g.,
restaurant, fashion) for a newly opened store.
Traditional ways are predominantly field surveys and
empirical estimation, which are not effective as they
lack shop-related data. As social media and
location-based services (LBS) are becoming more and
more pervasive, user-generated data from these
platforms are providing rich information not only about
individual consumption experiences, but also about shop
attributes. In this paper, we investigate the
recommendation of shop types for a given location, by
leveraging heterogeneous data that are mainly
historical user preferences and location context from
social media and LBS. Our goal is to select the most
suitable shop type, seeking to maximize the number of
customers served from a candidate set of types. We
propose a novel bias learning matrix factorization
method with feature fusion for shop popularity
prediction. Features are defined and extracted from two
perspectives: location, where features are closely
related to location characteristics, and commercial,
where features are about the relationships between
shops in the neighborhood. Experimental results show
that the proposed method outperforms state-of-the-art
solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{McDowell:2016:LNA,
author = "Luke K. McDowell and David W. Aha",
title = "Leveraging Neighbor Attributes for Classification in
Sparsely Labeled Networks",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "2:1--2:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2898358",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Many analysis tasks involve linked nodes, such as
people connected by friendship links. Research on
link-based classification (LBC) has studied how to
leverage these connections to improve classification
accuracy. Most such prior research has assumed the
provision of a densely labeled training network.
Instead, this article studies the common and
challenging case when LBC must use a single sparsely
labeled network for both learning and inference, a case
where existing methods often yield poor accuracy. To
address this challenge, we introduce a novel method
that enables prediction via ``neighbor attributes,''
which were briefly considered by early LBC work but
then abandoned due to perceived problems. We then
explain, using both extensive experiments and loss
decomposition analysis, how using neighbor attributes
often significantly improves accuracy. We further show
that using appropriate semi-supervised learning (SSL)
is essential to obtaining the best accuracy in this
domain and that the gains of neighbor attributes remain
across a range of SSL choices and data conditions.
Finally, given the challenges of label sparsity for LBC
and the impact of neighbor attributes, we show that
multiple previous studies must be re-considered,
including studies regarding the best model features,
the impact of noisy attributes, and strategies for
active learning.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chang:2016:CSP,
author = "Xiaojun Chang and Feiping Nie and Yi Yang and Chengqi
Zhang and Heng Huang",
title = "Convex Sparse {PCA} for Unsupervised Feature
Learning",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "3:1--3:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2910585",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Principal component analysis (PCA) has been widely
applied to dimensionality reduction and data
pre-processing for different applications in
engineering, biology, social science, and the like.
Classical PCA and its variants seek for linear
projections of the original variables to obtain the
low-dimensional feature representations with maximal
variance. One limitation is that it is difficult to
interpret the results of PCA. Besides, the classical
PCA is vulnerable to certain noisy data. In this paper,
we propose a Convex Sparse Principal Component Analysis
(CSPCA) algorithm and apply it to feature learning.
First, we show that PCA can be formulated as a low-rank
regression optimization problem. Based on the
discussion, the $ l_{2, 1}$-norm minimization is
incorporated into the objective function to make the
regression coefficients sparse, thereby robust to the
outliers. Also, based on the sparse model used in
CSPCA, an optimal weight is assigned to each of the
original feature, which in turn provides the output
with good interpretability. With the output of our
CSPCA, we can effectively analyze the importance of
each feature under the PCA criteria. Our new objective
function is convex, and we propose an iterative
algorithm to optimize it. We apply the CSPCA algorithm
to feature selection and conduct extensive experiments
on seven benchmark datasets. Experimental results
demonstrate that the proposed algorithm outperforms
state-of-the-art unsupervised feature selection
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2016:LLR,
author = "Ou Wu and Qiang You and Fen Xia and Lei Ma and Weiming
Hu",
title = "Listwise Learning to Rank from Crowds",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "4:1--4:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2910586",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Learning to rank has received great attention in
recent years as it plays a crucial role in many
applications such as information retrieval and data
mining. The existing concept of learning to rank
assumes that each training instance is associated with
a reliable label. However, in practice, this assumption
does not necessarily hold true as it may be infeasible
or remarkably expensive to obtain reliable labels for
many learning to rank applications. Therefore, a
feasible approach is to collect labels from crowds and
then learn a ranking function from crowdsourcing
labels. This study explores the listwise learning to
rank with crowdsourcing labels obtained from multiple
annotators, who may be unreliable. A new probabilistic
ranking model is first proposed by combining two
existing models. Subsequently, a ranking function is
trained by proposing a maximum likelihood learning
approach, which estimates ground-truth labels and
annotator expertise, and trains the ranking function
iteratively. In practical crowdsourcing machine
learning, valuable side information (e.g., professional
grades) about involved annotators is normally
attainable. Therefore, this study also investigates
learning to rank from crowd labels when side
information on the expertise of involved annotators is
available. In particular, three basic types of side
information are investigated, and corresponding
learning algorithms are consequently introduced.
Further, the top-k learning to rank from crowdsourcing
labels are explored to deal with long training ranking
lists. The proposed algorithms are tested on both
synthetic and real-world data. Results reveal that the
maximum likelihood estimation approach significantly
outperforms the average approach and existing
crowdsourcing regression methods. The performances of
the proposed algorithms are comparable to those of the
learning model in consideration reliable labels. The
results of the investigation further indicate that side
information is helpful in inferring both ranking
functions and expertise degrees of annotators.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shao:2016:SCI,
author = "Junming Shao and Qinli Yang and Hoang-Vu Dang and
Bertil Schmidt and Stefan Kramer",
title = "Scalable Clustering by Iterative Partitioning and
Point Attractor Representation",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "5:1--5:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2934688",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering very large datasets while preserving
cluster quality remains a challenging data-mining task
to date. In this paper, we propose an effective
scalable clustering algorithm for large datasets that
builds upon the concept of synchronization. Inherited
from the powerful concept of synchronization, the
proposed algorithm, CIPA (Clustering by Iterative
Partitioning and Point Attractor Representations), is
capable of handling very large datasets by iteratively
partitioning them into thousands of subsets and
clustering each subset separately. Using dynamic
clustering by synchronization, each subset is then
represented by a set of point attractors and outliers.
Finally, CIPA identifies the cluster structure of the
original dataset by clustering the newly generated
dataset consisting of points attractors and outliers
from all subsets. We demonstrate that our new scalable
clustering approach has several attractive benefits:
(a) CIPA faithfully captures the cluster structure of
the original data by performing clustering on each
separate data iteratively instead of using any sampling
or statistical summarization technique. (b) It allows
clustering very large datasets efficiently with high
cluster quality. (c) CIPA is parallelizable and also
suitable for distributed data. Extensive experiments
demonstrate the effectiveness and efficiency of our
approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Grabocka:2016:LTS,
author = "Josif Grabocka and Nicolas Schilling and Lars
Schmidt-Thieme",
title = "Latent Time-Series Motifs",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "6:1--6:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2940329",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Motifs are the most repetitive/frequent patterns of a
time-series. The discovery of motifs is crucial for
practitioners in order to understand and interpret the
phenomena occurring in sequential data. Currently,
motifs are searched among series sub-sequences, aiming
at selecting the most frequently occurring ones.
Search-based methods, which try out series sub-sequence
as motif candidates, are currently believed to be the
best methods in finding the most frequent patterns.
However, this paper proposes an entirely new
perspective in finding motifs. We demonstrate that
searching is non-optimal since the domain of motifs is
restricted, and instead we propose a principled
optimization approach able to find optimal motifs. We
treat the occurrence frequency as a function and
time-series motifs as its parameters, therefore we
learn the optimal motifs that maximize the frequency
function. In contrast to searching, our method is able
to discover the most repetitive patterns (hence
optimal), even in cases where they do not explicitly
occur as sub-sequences. Experiments on several
real-life time-series datasets show that the motifs
found by our method are highly more frequent than the
ones found through searching, for exactly the same
distance threshold.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2016:SNE,
author = "Xianchao Zhang and Linlin Zong and Quanzeng You and
Xing Yong",
title = "Sampling for {Nystr{\"o}m} Extension-Based Spectral
Clustering: Incremental Perspective and Novel
Analysis",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "7:1--7:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2934693",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Sampling is the key aspect for Nystr{\"o}m extension
based spectral clustering. Traditional sampling schemes
select the set of landmark points on a whole and focus
on how to lower the matrix approximation error.
However, the matrix approximation error does not have
direct impact on the clustering performance. In this
article, we propose a sampling framework from an
incremental perspective, i.e., the landmark points are
selected one by one, and each next point to be sampled
is determined by previously selected landmark points.
Incremental sampling builds explicit relationships
among landmark points; thus, they work together well
and provide a theoretical guarantee on the clustering
performance. We provide two novel analysis methods and
propose two schemes for selecting-the-next-one of the
framework. The first scheme is based on clusterability
analysis, which provides a better guarantee on
clustering performance than schemes based on matrix
approximation error analysis. The second scheme is
based on loss analysis, which provides maximized
predictive ability of the landmark points on the
(implicit) labels of the unsampled points. Experimental
results on a wide range of benchmark datasets
demonstrate the superiorities of our proposed
incremental sampling schemes over existing sampling
schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qiao:2016:FST,
author = "Maoying Qiao and Richard Yi Da Xu and Wei Bian and
Dacheng Tao",
title = "Fast Sampling for Time-Varying Determinantal Point
Processes",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "8:1--8:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2943785",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Determinantal Point Processes (DPPs) are stochastic
models which assign each subset of a base dataset with
a probability proportional to the subset's degree of
diversity. It has been shown that DPPs are particularly
appropriate in data subset selection and summarization
(e.g., news display, video summarizations). DPPs prefer
diverse subsets while other conventional models cannot
offer. However, DPPs inference algorithms have a
polynomial time complexity which makes it difficult to
handle large and time-varying datasets, especially when
real-time processing is required. To address this
limitation, we developed a fast sampling algorithm for
DPPs which takes advantage of the nature of some
time-varying data (e.g., news corpora updating,
communication network evolving), where the data changes
between time stamps are relatively small. The proposed
algorithm is built upon the simplification of marginal
density functions over successive time stamps and the
sequential Monte Carlo (SMC) sampling technique.
Evaluations on both a real-world news dataset and the
Enron Corpus confirm the efficiency of the proposed
algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Crescenzi:2016:GIO,
author = "Pierluigi Crescenzi and Gianlorenzo D'angelo and
Lorenzo Severini and Yllka Velaj",
title = "Greedily Improving Our Own Closeness Centrality in a
Network",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "9:1--9:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2953882",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The closeness centrality is a well-known measure of
importance of a vertex within a given complex network.
Having high closeness centrality can have positive
impact on the vertex itself: hence, in this paper we
consider the optimization problem of determining how
much a vertex can increase its centrality by creating a
limited amount of new edges incident to it. We will
consider both the undirected and the directed graph
cases. In both cases, we first prove that the
optimization problem does not admit a polynomial-time
approximation scheme (unless P = NP), and then propose
a greedy approximation algorithm (with an almost tight
approximation ratio), whose performance is then tested
on synthetic graphs and real-world networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2016:CBN,
author = "Xiang Li and Charles X. Ling and Huaimin Wang",
title = "The Convergence Behavior of Naive {Bayes} on Large
Sparse Datasets",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "10:1--10:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2948068",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Large and sparse datasets with a lot of missing values
are common in the big data era, such as user behaviors
over a large number of items. Classification in such
datasets is an important topic for machine learning and
data mining. Practically, naive Bayes is still a
popular classification algorithm for large sparse
datasets, as its time and space complexity scales
linearly with the size of non-missing values. However,
several important questions about the behavior of naive
Bayes are yet to be answered. For example, how
different mechanisms of data missing, data sparsity,
and the number of attributes systematically affect the
learning curves and convergence? In this paper, we
address several common data missing mechanisms and
propose novel data generation methods based on these
mechanisms. We generate large and sparse data
systematically, and study the entire AUC (Area Under
ROC Curve) learning curve and convergence behavior of
naive Bayes. We not only have several important
experiment observations, but also provide detailed
theoretic studies. Finally, we summarize our empirical
and theoretic results as an intuitive decision
flowchart and a useful guideline for classifying large
sparse datasets in practice.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Fu:2016:MGD,
author = "Yanjie Fu and Hui Xiong and Yong Ge and Yu Zheng and
Zijun Yao and Zhi-Hua Zhou",
title = "Modeling of Geographic Dependencies for Real Estate
Ranking",
journal = j-TKDD,
volume = "11",
number = "1",
pages = "11:1--11:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2934692",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Aug 29 07:28:30 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "It is traditionally a challenge for home buyers to
understand, compare, and contrast the investment value
of real estate. Although a number of appraisal methods
have been developed to value real properties, the
performances of these methods have been limited by
traditional data sources for real estate appraisal.
With the development of new ways of collecting
estate-related mobile data, there is a potential to
leverage geographic dependencies of real estate for
enhancing real estate appraisal. Indeed, the geographic
dependencies of the investment value of an estate can
be from the characteristics of its own neighborhood
(individual), the values of its nearby estates (peer),
and the prosperity of the affiliated latent business
area (zone). To this end, in this paper, we propose a
geographic method, named ClusRanking, for real estate
appraisal by leveraging the mutual enforcement of
ranking and clustering power. ClusRanking is able to
exploit geographic individual, peer, and zone
dependencies in a probabilistic ranking model.
Specifically, we first extract the geographic utility
of estates from geography data, estimate the
neighborhood popularity of estates by mining taxicab
trajectory data, and model the influence of latent
business areas. Also, we fuse these three influential
factors and predict real estate investment value.
Moreover, we simultaneously consider individual, peer
and zone dependencies, and derive an estate-specific
ranking likelihood as the objective function.
Furthermore, we propose an improved method named
CR-ClusRanking by incorporating checkin information as
a regularization term which reduces the performance
volatility of real estate ranking system. Finally, we
conduct a comprehensive evaluation with the real
estate-related data of Beijing, and the experimental
results demonstrate the effectiveness of our proposed
methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gao:2016:DAC,
author = "Zekai J. Gao and Chris Jermaine",
title = "Distributed Algorithms for Computing Very Large
Thresholded Covariance Matrices",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "12:1--12:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2935750",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Computation of covariance matrices from observed data
is an important problem, as such matrices are used in
applications such as principal component analysis
(PCA), linear discriminant analysis (LDA), and
increasingly in the learning and application of
probabilistic graphical models. However, computing an
empirical covariance matrix is not always an easy
problem. There are two key difficulties associated with
computing such a matrix from a very high-dimensional
dataset. The first problem is over-fitting. For a
$p$-dimensional covariance matrix, there are $ p(p - 1)
/ 2$ unique, off-diagonal entries in the empirical
covariance matrix $S$ for large $p$ (say, $ p > 10^5$),
the size $n$ of the dataset is often much smaller than
the number of covariances to compute. Over-fitting is a
concern in any situation in which the number of
parameters learned can greatly exceed the size of the
dataset. Thus, there are strong theoretical reasons to
expect that for high-dimensional data-even Gaussian
data-the empirical covariance matrix is not a good
estimate for the true covariance matrix underlying the
generative process. The second problem is
computational. Computing a covariance matrix takes $
O(n p^2)$ time. For large $p$ (greater than 10,000) and
$n$ much greater than $p$, this is debilitating. In
this article, we consider how both of these
difficulties can be handled simultaneously.
Specifically, a key regularization technique for
high-dimensional covariance estimation is thresholding,
in which the smallest or least significant entries in
the covariance matrix are simply dropped and replaced
with the value $0$. This suggests an obvious way to
address the computational difficulty as well: First,
compute the identities of the $K$ entries in the
covariance matrix that are actually important in the
sense that they will not be removed during
thresholding, and then in a second step, compute the
values of those entries. This can be done in $ O(K n)$
time. If $ K \ll p^2$ and the identities of the
important entries can be computed in reasonable time,
then this is a big win. The key technical contribution
of this article is the design and implementation of two
different distributed algorithms for approximating the
identities of the important entries quickly, using
sampling. We have implemented these methods and tested
them using an 800-core compute cluster. Experiments
have been run using real datasets having millions of
data points and up to 40,000 dimensions. These
experiments show that the proposed methods are both
accurate and efficient.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2016:WKI,
author = "Chenguang Wang and Yangqiu Song and Dan Roth and Ming
Zhang and Jiawei Han",
title = "World Knowledge as Indirect Supervision for Document
Clustering",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "13:1--13:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2953881",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "One of the key obstacles in making learning protocols
realistic in applications is the need to supervise
them, a costly process that often requires hiring
domain experts. We consider the framework to use the
world knowledge as indirect supervision. World
knowledge is general-purpose knowledge, which is not
designed for any specific domain. Then, the key
challenges are how to adapt the world knowledge to
domains and how to represent it for learning. In this
article, we provide an example of using world knowledge
for domain-dependent document clustering. We provide
three ways to specify the world knowledge to domains by
resolving the ambiguity of the entities and their
types, and represent the data with world knowledge as a
heterogeneous information network. Then, we propose a
clustering algorithm that can cluster multiple types
and incorporate the sub-type information as
constraints. In the experiments, we use two existing
knowledge bases as our sources of world knowledge. One
is Freebase, which is collaboratively collected
knowledge about entities and their organizations. The
other is YAGO2, a knowledge base automatically
extracted from Wikipedia and maps knowledge to the
linguistic knowledge base, WordNet. Experimental
results on two text benchmark datasets (20newsgroups
and RCV1) show that incorporating world knowledge as
indirect supervision can significantly outperform the
state-of-the-art clustering algorithms as well as
clustering algorithms enhanced with world knowledge
features. A preliminary version of this work appeared
in the proceedings of KDD 2015 [Wang et al. 2015a].
This journal version has made several major
improvements. First, we have proposed a new and general
learning framework for machine learning with world
knowledge as indirect supervision, where document
clustering is a special case in the original paper.
Second, in order to make our unsupervised semantic
parsing method more understandable, we add several real
cases from the original sentences to the resulting
logic forms with all the necessary information. Third,
we add details of the three semantic filtering methods
and conduct deep analysis of the three semantic
filters, by using case studies to show why the
conceptualization-based semantic filter can produce
more accurate indirect supervision. Finally, in
addition to the experiment on 20 newsgroup data and
Freebase, we have extended the experiments on
clustering results by using all the combinations of
text (20 newsgroup, MCAT, CCAT, ECAT) and world
knowledge sources (Freebase, YAGO2).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chakraborty:2016:PCS,
author = "Tanmoy Chakraborty and Sriram Srinivasan and Niloy
Ganguly and Animesh Mukherjee and Sanjukta Bhowmick",
title = "Permanence and Community Structure in Complex
Networks",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "14:1--14:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2953883",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The goal of community detection algorithms is to
identify densely connected units within large networks.
An implicit assumption is that all the constituent
nodes belong equally to their associated community.
However, some nodes are more important in the community
than others. To date, efforts have been primarily made
to identify communities as a whole, rather than
understanding to what extent an individual node belongs
to its community. Therefore, most metrics for
evaluating communities, for example modularity, are
global. These metrics produce a score for each
community, not for each individual node. In this
article, we argue that the belongingness of nodes in a
community is not uniform. We quantify the degree of
belongingness of a vertex within a community by a new
vertex-based metric called permanence. The central idea
of permanence is based on the observation that the
strength of membership of a vertex to a community
depends upon two factors (i) the extent of connections
of the vertex within its community versus outside its
community, and (ii) how tightly the vertex is connected
internally. We present the formulation of permanence
based on these two quantities. We demonstrate that
compared to other existing metrics (such as modularity,
conductance, and cut-ratio), the change in permanence
is more commensurate to the level of perturbation in
ground-truth communities. We discuss how permanence can
help us understand and utilize the structure and
evolution of communities by demonstrating that it can
be used to --- (i) measure the persistence of a vertex
in a community, (ii) design strategies to strengthen
the community structure, (iii) explore the
core-periphery structure within a community, and (iv)
select suitable initiators for message spreading. We
further show that permanence is an excellent metric for
identifying communities. We demonstrate that the
process of maximizing permanence (abbreviated as
MaxPerm) produces meaningful communities that concur
with the ground-truth community structure of the
networks more accurately than eight other popular
community detection algorithms. Finally, we provide
mathematical proofs to demonstrate the correctness of
finding communities by maximizing permanence. In
particular, we show that the communities obtained by
this method are (i) less affected by the changes in
vertex ordering, and (ii) more resilient to resolution
limit, degeneracy of solutions, and asymptotic growth
of values.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Smith:2016:PNN,
author = "Laura M. Smith and Linhong Zhu and Kristina Lerman and
Allon G. Percus",
title = "Partitioning Networks with Node Attributes by
Compressing Information Flow",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "15:1--15:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2968451",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Real-world networks are often organized as modules or
communities of similar nodes that serve as functional
units. These networks are also rich in content, with
nodes having distinguished features or attributes. In
order to discover a network's modular structure, it is
necessary to take into account not only its links but
also node attributes. We describe an
information-theoretic method that identifies modules by
compressing descriptions of information flow on a
network. Our formulation introduces node content into
the description of information flow, which we then
minimize to discover groups of nodes with similar
attributes that also tend to trap the flow of
information. The method is conceptually simple and does
not require ad-hoc parameters to specify the number of
modules or to control the relative contribution of
links and node attributes to network structure. We
apply the proposed method to partition real-world
networks with known community structure. We demonstrate
that adding node attributes helps recover the
underlying community structure in content-rich networks
more effectively than using links alone. In addition,
we show that our method is faster and more accurate
than alternative state-of-the-art algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2016:SAO,
author = "Kui Yu and Xindong Wu and Wei Ding and Jian Pei",
title = "Scalable and Accurate Online Feature Selection for Big
Data",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "16:1--16:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2976744",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Feature selection is important in many big data
applications. Two critical challenges closely associate
with big data. First, in many big data applications,
the dimensionality is extremely high, in millions, and
keeps growing. Second, big data applications call for
highly scalable feature selection algorithms in an
online manner such that each feature can be processed
in a sequential scan. We present SAOLA, a {Scalable and
Accurate On Line Approach} for feature selection in
this paper. With a theoretical analysis on bounds of
the pairwise correlations between features, SAOLA
employs novel pairwise comparison techniques and
maintains a parsimonious model over time in an online
manner. Furthermore, to deal with upcoming features
that arrive by groups, we extend the SAOLA algorithm,
and then propose a new group-SAOLA algorithm for online
group feature selection. The group-SAOLA algorithm can
online maintain a set of feature groups that is sparse
at the levels of both groups and individual features
simultaneously. An empirical study using a series of
benchmark real datasets shows that our two algorithms,
SAOLA and group-SAOLA, are scalable on datasets of
extremely high dimensionality and have superior
performance over the state-of-the-art feature selection
methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2016:SAU,
author = "Bin Liu and Yao Wu and Neil Zhenqiang Gong and Junjie
Wu and Hui Xiong and Martin Ester",
title = "Structural Analysis of User Choices for Mobile App
Recommendation",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "17:1--17:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983533",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Advances in smartphone technology have promoted the
rapid development of mobile apps. However, the
availability of a huge number of mobile apps in
application stores has imposed the challenge of finding
the right apps to meet the user needs. Indeed, there is
a critical demand for personalized app recommendations.
Along this line, there are opportunities and challenges
posed by two unique characteristics of mobile apps.
First, app markets have organized apps in a
hierarchical taxonomy. Second, apps with similar
functionalities are competing with each other. Although
there are a variety of approaches for mobile app
recommendations, these approaches do not have a focus
on dealing with these opportunities and challenges. To
this end, in this article, we provide a systematic
study for addressing these challenges. Specifically, we
develop a structural user choice model (SUCM) to learn
fine-grained user preferences by exploiting the
hierarchical taxonomy of apps as well as the
competitive relationships among apps. Moreover, we
design an efficient learning algorithm to estimate the
parameters for the SUCM model. Finally, we perform
extensive experiments on a large app adoption dataset
collected from Google Play. The results show that SUCM
consistently outperforms state-of-the-art Top-N
recommendation methods by a significant margin.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Afrati:2016:APD,
author = "Foto Afrati and Shlomi Dolev and Ephraim Korach and
Shantanu Sharma and Jeffrey D. Ullman",
title = "Assignment Problems of Different-Sized Inputs in
{MapReduce}",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "18:1--18:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2987376",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "A MapReduce algorithm can be described by a mapping
schema, which assigns inputs to a set of reducers, such
that for each required output there exists a reducer
that receives all the inputs participating in the
computation of this output. Reducers have a capacity
that limits the sets of inputs they can be assigned.
However, individual inputs may vary in terms of size.
We consider, for the first time, mapping schemas where
input sizes are part of the considerations and
restrictions. One of the significant parameters to
optimize in any MapReduce job is communication cost
between the map and reduce phases. The communication
cost can be optimized by minimizing the number of
copies of inputs sent to the reducers. The
communication cost is closely related to the number of
reducers of constrained capacity that are used to
accommodate appropriately the inputs, so that the
requirement of how the inputs must meet in a reducer is
satisfied. In this work, we consider a family of
problems where it is required that each input meets
with each other input in at least one reducer. We also
consider a slightly different family of problems in
which each input of a list, X, is required to meet each
input of another list, Y, in at least one reducer. We
prove that finding an optimal mapping schema for these
families of problems is NP-hard, and present a
bin-packing-based approximation algorithm for finding a
near optimal mapping schema.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2016:UHM,
author = "Zhongyuan Wang and Fang Wang and Haixun Wang and
Zhirui Hu and Jun Yan and Fangtao Li and Ji-Rong Wen
and Zhoujun Li",
title = "Unsupervised Head-Modifier Detection in Search
Queries",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "19:1--19:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2988235",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Interpreting the user intent in search queries is a
key task in query understanding. Query intent
classification has been widely studied. In this
article, we go one step further to understand the query
from the view of head-modifier analysis. For example,
given the query ``popular iphone 5 smart cover,''
instead of using coarse-grained semantic classes (e.g.,
find electronic product), we interpret that ``smart
cover'' is the head or the intent of the query and
``iphone 5'' is its modifier. Query head-modifier
detection can help search engines to obtain
particularly relevant content, which is also important
for applications such as ads matching and query
recommendation. We introduce an unsupervised semantic
approach for query head-modifier detection. First, we
mine a large number of instance level head-modifier
pairs from search log. Then, we develop a
conceptualization mechanism to generalize the instance
level pairs to concept level. Finally, we derive
weighted concept patterns that are concise, accurate,
and have strong generalization power in head-modifier
detection. The developed mechanism has been used in
production for search relevance and ads matching. We
use extensive experiment results to demonstrate the
effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chang:2016:LMB,
author = "Yi Chang and Makoto Yamada and Antonio Ortega and Yan
Liu",
title = "Lifecycle Modeling for Buzz Temporal Pattern
Discovery",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "20:1--20:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2994605",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In social media analysis, one critical task is
detecting a burst of topics or buzz, which is reflected
by extremely frequent mentions of certain keywords in a
short-time interval. Detecting buzz not only provides
useful insights into the information propagation
mechanism, but also plays an essential role in
preventing malicious rumors. However, buzz modeling is
a challenging task because a buzz time-series often
exhibits sudden spikes and heavy tails, wherein most
existing time-series models fail. In this article, we
propose novel buzz modeling approaches that capture the
rise and fade temporal patterns via Product Lifecycle
(PLC) model, a classical concept in economics. More
specifically, we propose to model multiple peaks in
buzz time-series with PLC mixture or PLC group mixture
and develop a probabilistic graphical model (K-Mixture
of Product Lifecycle) (K-MPLC) to automatically
discover inherent lifecycle patterns within a
collection of buzzes. Furthermore, we effectively
utilize the model parameters of PLC mixture or PLC
group mixture for burst prediction. Our experimental
results show that our proposed methods significantly
outperform existing leading approaches on buzz
clustering and buzz-type prediction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wei:2016:NBG,
author = "Qiang Wei and Dandan Qiao and Jin Zhang and Guoqing
Chen and Xunhua Guo",
title = "A Novel Bipartite Graph Based Competitiveness Degree
Analysis from Query Logs",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "21:1--21:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996196",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Competitiveness degree analysis is a focal point of
business strategy and competitive intelligence, aimed
to help managers closely monitor to what extent their
rivals are competing with them. This article proposes a
novel method, namely BCQ, to measure the
competitiveness degree between peers from query logs as
an important form of user generated contents, which
reflects the ``wisdom of crowds'' from the search
engine users' perspective. In doing so, a bipartite
graph model is developed to capture the competitive
relationships through conjoint attributes hidden in
query logs, where the notion of competitiveness degree
for entity pairs is introduced, and then used to
identify the competitive paths mapped in the bipartite
graph. Subsequently, extensive experiments are
conducted to demonstrate the effectiveness of BCQ to
quantify the competitiveness degrees. Experimental
results reveal that BCQ can well support competitors
ranking, which is helpful for devising competitive
strategies and pursuing market performance. In
addition, efficiency experiments on synthetic data show
a good scalability of BCQ on large scale of query
logs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pei:2016:CCP,
author = "Yuanli Pei and Xiaoli Z. Fern and Teresa Vania Tjahja
and R{\'o}mer Rosales",
title = "Comparing Clustering with Pairwise and Relative
Constraints: a Unified Framework",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "22:1--22:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996467",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering can be improved with the help of side
information about the similarity relationships among
instances. Such information has been commonly
represented by two types of constraints: pairwise
constraints and relative constraints, regarding
similarities about instance pairs and triplets,
respectively. Prior work has mostly considered these
two types of constraints separately and developed
individual algorithms to learn from each type. In
practice, however, it is critical to understand/compare
the usefulness of the two types of constraints as well
as the cost of acquiring them, which has not been
studied before. This paper provides an extensive
comparison of clustering with these two types of
constraints. Specifically, we compare their impacts
both on human users that provide such constraints and
on the learning system that incorporates such
constraints into clustering. In addition, to ensure
that the comparison of clustering is performed on equal
ground (without the potential bias introduced by
different learning algorithms), we propose a
probabilistic semi-supervised clustering framework that
can learn from either type of constraints. Our
experiments demonstrate that the proposed
semi-supervised clustering framework is highly
effective at utilizing both types of constraints to aid
clustering. Our user study provides valuable insights
regarding the impact of the constraints on human users,
and our experiments on clustering with the
human-labeled constraints reveal that relative
constraint is often more efficient at improving
clustering.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lorenzetti:2016:MTS,
author = "Carlos Lorenzetti and Ana Maguitman and David Leake
and Filippo Menczer and Thomas Reichherzer",
title = "Mining for Topics to Suggest Knowledge Model
Extensions",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "23:1--23:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2997657",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Electronic concept maps, interlinked with other
concept maps and multimedia resources, can provide rich
knowledge models to capture and share human knowledge.
This article presents and evaluates methods to support
experts as they extend existing knowledge models, by
suggesting new context-relevant topics mined from Web
search engines. The task of generating topics to
support knowledge model extension raises two research
questions: first, how to extract topic descriptors and
discriminators from concept maps; and second, how to
use these topic descriptors and discriminators to
identify candidate topics on the Web with the right
balance of novelty and relevance. To address these
questions, this article first develops the theoretical
framework required for a ``topic suggester'' to aid
information search in the context of a knowledge model
under construction. It then presents and evaluates
algorithms based on this framework and applied in
Extender, an implemented tool for topic suggestion.
Extender has been developed and tested within
CmapTools, a widely used system for supporting
knowledge modeling using concept maps. However, the
generality of the algorithms makes them applicable to a
broad class of knowledge modeling systems, and to Web
search in general.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kumar:2016:ACT,
author = "Dheeraj Kumar and James C. Bezdek and Sutharshan
Rajasegarar and Marimuthu Palaniswami and Christopher
Leckie and Jeffrey Chan and Jayavardhana Gubbi",
title = "Adaptive Cluster Tendency Visualization and Anomaly
Detection for Streaming Data",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "24:1--24:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2997656",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The growth in pervasive network infrastructure called
the Internet of Things (IoT) enables a wide range of
physical objects and environments to be monitored in
fine spatial and temporal detail. The detailed, dynamic
data that are collected in large quantities from sensor
devices provide the basis for a variety of
applications. Automatic interpretation of these
evolving large data is required for timely detection of
interesting events. This article develops and
exemplifies two new relatives of the visual assessment
of tendency (VAT) and improved visual assessment of
tendency (iVAT) models, which uses cluster heat maps to
visualize structure in static datasets. One new model
is initialized with a static VAT/iVAT image, and then
incrementally (hence inc-VAT/inc-iVAT) updates the
current minimal spanning tree (MST) used by VAT with an
efficient edge insertion scheme. Similarly,
dec-VAT/dec-iVAT efficiently removes a node from the
current VAT MST. A sequence of inc-iVAT/dec-iVAT images
can be used for (visual) anomaly detection in evolving
data streams and for sliding window based cluster
assessment for time series data. The method is
illustrated with four real datasets (three of them
being smart city IoT data). The evaluation demonstrates
the algorithms' ability to successfully isolate
anomalies and visualize changing cluster structure in
the streaming data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2016:EVM,
author = "Wen-Yuan Zhu and Wen-Chih Peng and Ling-Jyh Chen and
Kai Zheng and Xiaofang Zhou",
title = "Exploiting Viral Marketing for Location Promotion in
Location-Based Social Networks",
journal = j-TKDD,
volume = "11",
number = "2",
pages = "25:1--25:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/3001938",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Dec 26 17:17:00 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "With the explosion of smartphones and social network
services, location-based social networks (LBSNs) are
increasingly seen as tools for businesses (e.g.,
restaurants and hotels) to promote their products and
services. In this article, we investigate the key
techniques that can help businesses promote their
locations by advertising wisely through the underlying
LBSNs. In order to maximize the benefit of location
promotion, we formalize it as an influence maximization
problem in an LBSN, i.e., given a target location and
an LBSN, a set of k users (called seeds) should be
advertised initially such that they can successfully
propagate and attract many other users to visit the
target location. Existing studies have proposed
different ways to calculate the information propagation
probability, that is, how likely it is that a user may
influence another, in the setting of a static social
network. However, it is more challenging to derive the
propagation probability in an LBSN since it is heavily
affected by the target location and the user mobility,
both of which are dynamic and query dependent. This
article proposes two user mobility models, namely the
Gaussian-based and distance-based mobility models, to
capture the check-in behavior of individual LBSN users,
based on which location-aware propagation probabilities
can be derived. Extensive experiments based on two real
LBSN datasets have demonstrated the superior
effectiveness of our proposals compared with existing
static models of propagation probabilities to truly
reflect the information propagation in LBSNs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sariyuce:2017:GMF,
author = "Ahmet Erdem Sariy{\"u}ce and Kamer Kaya and Erik Saule
and {\"U}mit V. {\c{C}}ataly{\"u}rek",
title = "Graph Manipulations for Fast Centrality Computation",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "26:1--26:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3022668",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The betweenness and closeness metrics are widely used
metrics in many network analysis applications. Yet,
they are expensive to compute. For that reason, making
the betweenness and closeness centrality computations
faster is an important and well-studied problem. In
this work, we propose the framework BADIOS that
manipulates the graph by compressing it and splitting
into pieces so that the centrality computation can be
handled independently for each piece. Experimental
results show that the proposed techniques can be a
great arsenal to reduce the centrality computation time
for various types and sizes of networks. In particular,
it reduces the betweenness centrality computation time
of a 4.6 million edges graph from more than 5 days to
less than 16 hours. For the same graph, the closeness
computation time is decreased from more than 3 days to
6 hours (12.7x speedup).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rozenshtein:2017:FDD,
author = "Polina Rozenshtein and Nikolaj Tatti and Aristides
Gionis",
title = "Finding Dynamic Dense Subgraphs",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "27:1--27:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3046791",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Online social networks are often defined by
considering interactions of entities at an aggregate
level. For example, a call graph is formed among
individuals who have called each other at least once;
or at least k times. Similarly, in social-media
platforms, we consider implicit social networks among
users who have interacted in some way, e.g., have made
a conversation, have commented to the content of each
other, and so on. Such definitions have been used
widely in the literature and they have offered
significant insights regarding the structure of social
networks. However, it is obvious that they suffer from
a severe limitation: They neglect the precise time that
interactions among the network entities occur. In this
article, we consider interaction networks, where the
data description contains not only information about
the underlying topology of the social network, but also
the exact time instances that network entities
interact. In an interaction network, an edge is
associated with a timestamp, and multiple edges may
occur for the same pair of entities. Consequently,
interaction networks offer a more fine-grained
representation, which can be leveraged to reveal
otherwise hidden dynamic phenomena. In the setting of
interaction networks, we study the problem of
discovering dynamic dense subgraphs whose edges occur
in short time intervals. We view such subgraphs as
fingerprints of dynamic activity occurring within
network communities. Such communities represent groups
of individuals who interact with each other in specific
time instances, for example, a group of employees who
work on a project and whose interaction intensifies
before certain project milestones. We prove that the
problem we define is NP -hard, and we provide efficient
algorithms by adapting techniques for finding dense
subgraphs. We also show how to speed-up the proposed
methods by exploiting concavity properties of our
objective function and by the means of fractional
programming. We perform extensive evaluation of the
proposed methods on synthetic and real datasets, which
demonstrates the validity of our approach and shows
that our algorithms can be used to obtain high-quality
results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2017:MBM,
author = "Guannan Liu and Yanjie Fu and Guoqing Chen and Hui
Xiong and Can Chen",
title = "Modeling Buying Motives for Personalized Product
Bundle Recommendation",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "28:1--28:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3022185",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Product bundling is a marketing strategy that offers
several products/items for sale as one bundle. While
the bundling strategy has been widely used, less
efforts have been made to understand how items should
be bundled with respect to consumers' preferences and
buying motives for product bundles. This article
investigates the relationships between the items that
are bought together within a product bundle. To that
end, each purchased product bundle is formulated as a
bundle graph with items as nodes and the associations
between pairs of items in the bundle as edges. The
relationships between items can be analyzed by the
formation of edges in bundle graphs, which can be
attributed to the associations of feature aspects.
Then, a probabilistic model BPM (Bundle Purchases with
Motives) is proposed to capture the composition of each
bundle graph, with two latent factors node-type and
edge-type introduced to describe the feature aspects
and relationships respectively. Furthermore, based on
the preferences inferred from the model, an approach
for recommending items to form product bundles is
developed by estimating the probability that a consumer
would buy an associative item together with the item
already bought in the shopping cart. Finally,
experimental results on real-world transaction data
collected from well-known shopping sites show the
effectiveness advantages of the proposed approach over
other baseline methods. Moreover, the experiments also
show that the proposed model can explain consumers'
buying motives for product bundles in terms of
different node-types and edge-types.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2017:CSN,
author = "Ting Guo and Jia Wu and Xingquan Zhu and Chengqi
Zhang",
title = "Combining Structured Node Content and Topology
Information for Networked Graph Clustering",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "29:1--29:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996197",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Graphs are popularly used to represent objects with
shared dependency relationships. To date, all existing
graph clustering algorithms consider each node as a
single attribute or a set of independent attributes,
without realizing that content inside each node may
also have complex structures. In this article, we
formulate a new networked graph clustering task where a
network contains a set of inter-connected (or
networked) super-nodes, each of which is a
single-attribute graph. The new super-node
representation is applicable to many real-world
applications, such as a citation network where each
node denotes a paper whose content can be described as
a graph, and citation relationships between papers form
a networked graph (i.e., a super-graph). Networked
graph clustering aims to find similar node groups, each
of which contains nodes with similar content and
structure information. The main challenge is to
properly calculate the similarity between super-nodes
for clustering. To solve the problem, we propose to
characterize node similarity by integrating structure
and content information of each super-node. To measure
node content similarity, we use cosine distance by
considering overlapped attributes between two
super-nodes. To measure structure similarity, we
propose an Attributed Random Walk Kernel (ARWK) to
calculate the similarity between super-nodes. Detailed
node content analysis is also included to build
relationships between super-nodes with shared internal
structure information, so the structure similarity can
be calculated in a precise way. By integrating the
structure similarity and content similarity as one
matrix, the spectral clustering is used to achieve
networked graph clustering. Our method enjoys sound
theoretical properties, including bounded similarities
and better structure similarity assessment than
traditional graph clustering methods. Experiments on
real-world applications demonstrate that our method
significantly outperforms baseline approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2017:IPV,
author = "Qi Liu and Biao Xiang and Nicholas Jing Yuan and
Enhong Chen and Hui Xiong and Yi Zheng and Yu Yang",
title = "An Influence Propagation View of {PageRank}",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "30:1--30:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3046941",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "For a long time, PageRank has been widely used for
authority computation and has been adopted as a solid
baseline for evaluating social influence related
applications. However, when measuring the authority of
network nodes, the traditional PageRank method does not
take the nodes' prior knowledge into consideration.
Also, the connection between PageRank and social
influence modeling methods is not clearly established.
To that end, this article provides a focused study on
understanding PageRank as well as the relationship
between PageRank and social influence analysis. Along
this line, we first propose a linear social influence
model and reveal that this model generalizes the
PageRank-based authority computation by introducing
some constraints. Then, we show that the authority
computation by PageRank can be enhanced if exploiting
more reasonable constraints (e.g., from prior
knowledge). Next, to deal with the computational
challenge of linear model with general constraints, we
provide an upper bound for identifying nodes with top
authorities. Moreover, we extend the proposed linear
model for better measuring the authority of the given
node sets, and we also demonstrate the way to quickly
identify the top authoritative node sets. Finally,
extensive experimental evaluations on four real-world
networks validate the effectiveness of the proposed
linear model with respect to different constraint
settings. The results show that the methods with more
reasonable constraints can lead to better ranking and
recommendation performance. Meanwhile, the upper bounds
formed by PageRank values could be used to quickly
locate the nodes and node sets with the highest
authorities.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2017:LMD,
author = "Sen Wang and Xue Li and Xiaojun Chang and Lina Yao
and Quan Z. Sheng and Guodong Long",
title = "Learning Multiple Diagnosis Codes for {ICU} Patients
with Local Disease Correlation Mining",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "31:1--31:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3003729",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In the era of big data, a mechanism that can
automatically annotate disease codes to patients'
records in the medical information system is in demand.
The purpose of this work is to propose a framework that
automatically annotates the disease labels of
multi-source patient data in Intensive Care Units
(ICUs). We extract features from two main sources,
medical charts and notes. The Bag-of-Words model is
used to encode the features. Unlike most of the
existing multi-label learning algorithms that globally
consider correlations between diseases, our model
learns disease correlation locally in the patient data.
To achieve this, we derive a local disease correlation
representation to enrich the discriminant power of each
patient data. This representation is embedded into a
unified multi-label learning framework. We develop an
alternating algorithm to iteratively optimize the
objective function. Extensive experiments have been
conducted on a real-world ICU database. We have
compared our algorithm with representative multi-label
learning algorithms. Evaluation results have shown that
our proposed method has state-of-the-art performance in
the annotation of multiple diagnostic codes for ICU
patients. This study suggests that problems in the
automated diagnosis code annotation can be reliably
addressed by using a multi-label learning model that
exploits disease correlation. The findings of this
study will greatly benefit health care and management
in ICU considering that the automated diagnosis code
annotation can significantly improve the quality and
management of health care for both patients and
caregivers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bae:2017:SEF,
author = "Seung-Hee Bae and Daniel Halperin and Jevin D. West
and Martin Rosvall and Bill Howe",
title = "Scalable and Efficient Flow-Based Community Detection
for Large-Scale Graph Analysis",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "32:1--32:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2992785",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Community detection is an increasingly popular
approach to uncover important structures in large
networks. Flow-based community detection methods rely
on communication patterns of the network rather than
structural properties to determine communities. The
Infomap algorithm in particular optimizes a novel
objective function called the map equation and has been
shown to outperform other approaches in third-party
benchmarks. However, Infomap and its variants are
inherently sequential, limiting their use for
large-scale graphs. In this article, we propose a novel
algorithm to optimize the map equation called RelaxMap.
RelaxMap provides two important improvements over
Infomap: parallelization, so that the map equation can
be optimized over much larger graphs, and
prioritization, so that the most important work occurs
first, iterations take less time, and the algorithm
converges faster. We implement these techniques using
OpenMP on shared-memory multicore systems, and evaluate
our approach on a variety of graphs from standard graph
clustering benchmarks as well as real graph datasets.
Our evaluation shows that both techniques are
effective: RelaxMap achieves 70\% parallel efficiency
on eight cores, and prioritization improves algorithm
performance by an additional 20--50\% on average,
depending on the graph properties. Additionally,
RelaxMap converges in the similar number of iterations
and provides solutions of equivalent quality as the
serial Infomap implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Peng:2017:RGR,
author = "Chong Peng and Zhao Kang and Yunhong Hu and Jie Cheng
and Qiang Cheng",
title = "Robust Graph Regularized Nonnegative Matrix
Factorization for Clustering",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "33:1--33:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3003730",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Matrix factorization is often used for data
representation in many data mining and machine-learning
problems. In particular, for a dataset without any
negative entries, nonnegative matrix factorization
(NMF) is often used to find a low-rank approximation by
the product of two nonnegative matrices. With reduced
dimensions, these matrices can be effectively used for
many applications such as clustering. The existing
methods of NMF are often afflicted with their
sensitivity to outliers and noise in the data. To
mitigate this drawback, in this paper, we consider
integrating NMF into a robust principal component
model, and design a robust formulation that effectively
captures noise and outliers in the approximation while
incorporating essential nonlinear structures. A set of
comprehensive empirical evaluations in clustering
applications demonstrates that the proposed method has
strong robustness to gross errors and superior
performance to current state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tang:2017:PSS,
author = "Xun Tang and Maha Alabduljalil and Xin Jin and Tao
Yang",
title = "Partitioned Similarity Search with Cache-Conscious
Data Traversal",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "34:1--34:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3014060",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "All pairs similarity search (APSS) is used in many web
search and data mining applications. Previous work has
used techniques such as comparison filtering, inverted
indexing, and parallel accumulation of partial results.
However, shuffling intermediate results can incur
significant communication overhead as data scales up.
This paper studies a scalable two-phase approach called
Partition-based Similarity Search (PSS). The first
phase is to partition the data and group vectors that
are potentially similar. The second phase is to run a
set of tasks where each task compares a partition of
vectors with other candidate partitions. Due to data
sparsity and the presence of memory hierarchy,
accessing feature vectors during the partition
comparison phase incurs significant overhead. This
paper introduces a cache-conscious design for data
layout and traversal to reduce access time through
size-controlled data splitting and vector coalescing,
and it provides an analysis to guide the choice of
optimization parameters. The evaluation results show
that for the tested datasets, the proposed approach can
lead to an early elimination of unnecessary I/O and
data communication while sustaining parallel efficiency
with one order of magnitude of performance improvement
and it can also be integrated with LSH for approximated
APSS.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Feng:2017:RBC,
author = "Shanshan Feng and Jian Cao and Jie Wang and Shiyou
Qian",
title = "Recommendations Based on Comprehensively Exploiting
the Latent Factors Hidden in Items' Ratings and
Content",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "35:1--35:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3003728",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "To improve the performance of recommender systems in a
practical manner, several hybrid approaches have been
developed by considering item ratings and content
information simultaneously. However, most of these
hybrid approaches make recommendations based on
aggregating different recommendation techniques using
various strategies, rather than considering joint
modeling of the item's ratings and content, and thus
fail to detect many latent factors that could
potentially improve the performance of the recommender
systems. For this reason, these approaches continue to
suffer from data sparsity and do not work well for
recommending items to individual users. A few studies
try to describe a user's preference by detecting items'
latent features from content-description texts as
compensation for the sparse ratings. Unfortunately,
most of these methods are still generally unable to
accomplish recommendation tasks well for two reasons:
(1) they learn latent factors from text descriptions or
user--item ratings independently, rather than combining
them together; and (2) influences of latent factors
hidden in texts and ratings are not fully explored. In
this study, we propose a probabilistic approach that we
denote as latent random walk (LRW) based on the
combination of an integrated latent topic model and
random walk (RW) with the restart method, which can be
used to rank items according to expected user
preferences by detecting both their explicit and
implicit correlative information, in order to recommend
top-ranked items to potentially interested users. As
presented in this article, the goal of this work is to
comprehensively discover latent factors hidden in
items' ratings and content in order to alleviate the
data sparsity problem and to improve the performance of
recommender systems. The proposed topic model provides
a generative probabilistic framework that discovers
users' implicit preferences and items' latent features
simultaneously by exploiting both ratings and item
content information. On the basis of this probabilistic
framework, RW can predict a user's preference for
unrated items by discovering global latent relations.
In order to show the efficiency of the proposed
approach, we test LRW and other state-of-the-art
methods on three real-world datasets, namely,
CAMRa2011, Yahoo!, and APP. The experiments indicate
that our approach outperforms all comparative methods
and, in addition, that it is less sensitive to the data
sparsity problem, thus demonstrating the robustness of
LRW for recommendation tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2017:SPM,
author = "Xutong Liu and Feng Chen and Yen-Cheng Lu and
Chang-Tien Lu",
title = "Spatial Prediction for Multivariate Non-{Gaussian}
Data",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "36:1--36:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3022669",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "With the ever increasing volume of geo-referenced
datasets, there is a real need for better statistical
estimation and prediction techniques for spatial
analysis. Most existing approaches focus on predicting
multivariate Gaussian spatial processes, but as the
data may consist of non-Gaussian (or mixed type)
variables, this creates two challenges: (1) how to
accurately capture the dependencies among different
data types, both Gaussian and non-Gaussian; and (2) how
to efficiently predict multivariate non-Gaussian
spatial processes. In this article, we propose a
generic approach for predicting multiple response
variables of mixed types. The proposed approach
accurately captures cross-spatial dependencies among
response variables and reduces the computational burden
by projecting the spatial process to a lower
dimensional space with knot-based techniques. Efficient
approximations are provided to estimate posterior
marginals of latent variables for the predictive
process, and extensive experimental evaluations based
on both simulation and real-life datasets are provided
to demonstrate the effectiveness and efficiency of this
new approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2017:MDP,
author = "Liang Wang and Zhiwen Yu and Bin Guo and Tao Ku and
Fei Yi",
title = "Moving Destination Prediction Using Sparse Dataset: a
Mobility Gradient Descent Approach",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "37:1--37:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3051128",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Moving destination prediction offers an important
category of location-based applications and provides
essential intelligence to business and governments. In
existing studies, a common approach to destination
prediction is to match the given query trajectory with
massive recorded trajectories by similarity
calculation. Unfortunately, due to privacy concerns,
budget constraints, and many other factors, in most
circumstances, we can only obtain a sparse trajectory
dataset. In sparse dataset, the available moving
trajectories are far from enough to cover all possible
query trajectories; thus the predictability of the
matching-based approach will decrease remarkably.
Toward destination prediction with sparse dataset,
instead of searching similar trajectories over the
sparse records, we alternatively examine the changes of
distances from sampling locations to final destination
on query trajectory. The underlying idea is intuitive:
It is directly motivated by travel purpose, people
always get closer to the final destination during the
movement. By borrowing the conception of gradient
descent in optimization theory, we propose a novel
moving destination prediction approach, namely MGDPre.
Building upon the mobility gradient descent, MGDPre
only investigates the behavior characteristics of query
trajectory itself without matching historical
trajectories, and thus is applicable for sparse
dataset. We evaluate our approach based on extensive
experiments, using GPS trajectories generated by a
sample of taxis over a 10-day period in Shenzhen city,
China. The results demonstrate that the effectiveness,
efficiency, and scalability of our approach outperform
state-of-the-art baseline methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Fountoulakis:2017:RRA,
author = "Kimon Fountoulakis and Abhisek Kundu and Eugenia-Maria
Kontopoulou and Petros Drineas",
title = "A Randomized Rounding Algorithm for Sparse {PCA}",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "38:1--38:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3046948",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present and analyze a simple, two-step algorithm to
approximate the optimal solution of the sparse PCA
problem. In the proposed approach, we first solve an $
l_1$-penalized version of the NP-hard sparse PCA
optimization problem and then we use a randomized
rounding strategy to sparsify the resulting dense
solution. Our main theoretical result guarantees an
additive error approximation and provides a tradeoff
between sparsity and accuracy. Extensive experimental
evaluation indicates that the proposed approach is
competitive in practice, even compared to
state-of-the-art toolboxes such as Spasm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Aggarwal:2017:ISI,
author = "Charu C. Aggarwal",
title = "Introduction to Special Issue on the Best Papers from
{KDD 2016}",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "39:1--39:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092689",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This issue contains the best papers from the ACM KDD
Conference 2016. As is customary at KDD, special issue
papers are invited only from the research track. The
top-ranked papers from the KDD 2016 conference are
included in this issue. This issue contains a total of
six articles, which are from different areas of data
mining. A brief description of these articles is also
provided in this article.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cheng:2017:RCA,
author = "Wei Cheng and Jingchao Ni and Kai Zhang and Haifeng
Chen and Guofei Jiang and Yu Shi and Xiang Zhang and
Wei Wang",
title = "Ranking Causal Anomalies for System Fault Diagnosis
via Temporal and Dynamical Analysis on Vanishing
Correlations",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "40:1--40:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3046946",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Detecting system anomalies is an important problem in
many fields such as security, fault management, and
industrial optimization. Recently, invariant network
has shown to be powerful in characterizing complex
system behaviours. In the invariant network, a node
represents a system component and an edge indicates a
stable, significant interaction between two components.
Structures and evolutions of the invariance network, in
particular the vanishing correlations, can shed
important light on locating causal anomalies and
performing diagnosis. However, existing approaches to
detect causal anomalies with the invariant network
often use the percentage of vanishing correlations to
rank possible casual components, which have several
limitations: (1) fault propagation in the network is
ignored, (2) the root casual anomalies may not always
be the nodes with a high percentage of vanishing
correlations, (3) temporal patterns of vanishing
correlations are not exploited for robust detection,
and (4) prior knowledge on anomalous nodes are not
exploited for (semi-)supervised detection. To address
these limitations, in this article we propose a network
diffusion based framework to identify significant
causal anomalies and rank them. Our approach can
effectively model fault propagation over the entire
invariant network and can perform joint inference on
both the structural and the time-evolving broken
invariance patterns. As a result, it can locate
high-confidence anomalies that are truly responsible
for the vanishing correlations and can compensate for
unstructured measurement noise in the system. Moreover,
when the prior knowledge on the anomalous status of
some nodes are available at certain time points, our
approach is able to leverage them to further enhance
the anomaly inference accuracy. When the prior
knowledge is noisy, our approach also automatically
learns reliable information and reduces impacts from
noises. By performing extensive experiments on
synthetic datasets, bank information system datasets,
and coal plant cyber-physical system datasets, we
demonstrate the effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2017:CDM,
author = "Tianyang Zhang and Peng Cui and Christos Faloutsos and
Yunfei Lu and Hao Ye and Wenwu Zhu and Shiqiang Yang",
title = "{comeNgo}: a Dynamic Model for Social Group
Evolution",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "41:1--41:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3059214",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How do social groups, such as Facebook groups and
Wechat groups, dynamically evolve over time? How do
people join the social groups, uniformly or with burst?
What is the pattern of people quitting from groups? Is
there a simple universal model to depict the
come-and-go patterns of various groups? In this
article, we examine temporal evolution patterns of more
than 100 thousands social groups with more than 10
million users. We surprisingly find that the evolution
patterns of real social groups goes far beyond the
classic dynamic models like SI and SIR. For example, we
observe both diffusion and non-diffusion mechanism in
the group joining process, and power-law decay in group
quitting process, rather than exponential decay as
expected in SIR model. Therefore, we propose a new
model comeNgo, a concise yet flexible dynamic model for
group evolution. Our model has the following
advantages: (a) Unification power: it generalizes
earlier theoretical models and different joining and
quitting mechanisms we find from observation. (b)
Succinctness and interpretability: it contains only six
parameters with clear physical meanings. (c) Accuracy:
it can capture various kinds of group evolution
patterns preciously, and the goodness of fit increases
by 58\% over baseline. (d) Usefulness: it can be used
in multiple application scenarios, such as forecasting
and pattern discovery. Furthermore, our model can
provide insights about different evolution patterns of
social groups, and we also find that group structure
and its evolution has notable relations with temporal
patterns of group evolution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2017:CDI,
author = "Chen Chen and Hanghang Tong and Lei Xie and Lei Ying
and Qing He",
title = "Cross-Dependency Inference in Multi-Layered Networks:
a Collaborative Filtering Perspective",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "42:1--42:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3056562",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The increasingly connected world has catalyzed the
fusion of networks from different domains, which
facilitates the emergence of a new network
model-multi-layered networks. Examples of such kind of
network systems include critical infrastructure
networks, biological systems, organization-level
collaborations, cross-platform e-commerce, and so
forth. One crucial structure that distances
multi-layered network from other network models is its
cross-layer dependency, which describes the
associations between the nodes from different layers.
Needless to say, the cross-layer dependency in the
network plays an essential role in many data mining
applications like system robustness analysis and
complex network control. However, it remains a daunting
task to know the exact dependency relationships due to
noise, limited accessibility, and so forth. In this
article, we tackle the cross-layer dependency inference
problem by modeling it as a collective collaborative
filtering problem. Based on this idea, we propose an
effective algorithm Fascinate that can reveal
unobserved dependencies with linear complexity.
Moreover, we derive Fascinate-ZERO, an online variant
of Fascinate that can respond to a newly added node
timely by checking its neighborhood dependencies. We
perform extensive evaluations on real datasets to
substantiate the superiority of our proposed
approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{DeStefani:2017:TCL,
author = "Lorenzo {De Stefani} and Alessandro Epasto and Matteo
Riondato and Eli Upfal",
title = "{TRI{\`E}ST}: Counting Local and Global Triangles in
Fully Dynamic Streams with Fixed Memory Size",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "43:1--43:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3059194",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "``Ogni lassada xe persa.''$^1$ --- Proverb from
Trieste, Italy. We present tri{\`e}st, a suite of
one-pass streaming algorithms to compute unbiased,
low-variance, high-quality approximations of the global
and local (i.e., incident to each vertex) number of
triangles in a fully dynamic graph represented as an
adversarial stream of edge insertions and deletions.
Our algorithms use reservoir sampling and its variants
to exploit the user-specified memory space at all
times. This is in contrast with previous approaches,
which require hard-to-choose parameters (e.g., a fixed
sampling probability) and offer no guarantees on the
amount of memory they use. We analyze the variance of
the estimations and show novel concentration bounds for
these quantities. Our experimental results on very
large graphs demonstrate that tri{\`e}st outperforms
state-of-the-art approaches in accuracy and exhibits a
small update time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hooi:2017:GBF,
author = "Bryan Hooi and Kijung Shin and Hyun Ah Song and Alex
Beutel and Neil Shah and Christos Faloutsos",
title = "Graph-Based Fraud Detection in the Face of
Camouflage",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "44:1--44:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3056563",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a bipartite graph of users and the products that
they review, or followers and followees, how can we
detect fake reviews or follows? Existing fraud
detection methods (spectral, etc.) try to identify
dense subgraphs of nodes that are sparsely connected to
the remaining graph. Fraudsters can evade these methods
using camouflage, by adding reviews or follows with
honest targets so that they look ``normal.'' Even
worse, some fraudsters use hijacked accounts from
honest users, and then the camouflage is indeed
organic. Our focus is to spot fraudsters in the
presence of camouflage or hijacked accounts. We propose
FRAUDAR, an algorithm that (a) is camouflage resistant,
(b) provides upper bounds on the effectiveness of
fraudsters, and (c) is effective in real-world data.
Experimental results under various attacks show that
FRAUDAR outperforms the top competitor in accuracy of
detecting both camouflaged and non-camouflaged fraud.
Additionally, in real-world experiments with a Twitter
follower--followee graph of 1.47 billion edges, FRAUDAR
successfully detected a subgraph of more than 4, 000
detected accounts, of which a majority had tweets
showing that they used follower-buying services.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Anderson:2017:AHE,
author = "Ashton Anderson and Jon Kleinberg and Sendhil
Mullainathan",
title = "Assessing Human Error Against a Benchmark of
Perfection",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "45:1--45:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3046947",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "An increasing number of domains are providing us with
detailed trace data on human decisions in settings
where we can evaluate the quality of these decisions
via an algorithm. Motivated by this development, an
emerging line of work has begun to consider whether we
can characterize and predict the kinds of decisions
where people are likely to make errors. To investigate
what a general framework for human error prediction
might look like, we focus on a model system with a rich
history in the behavioral sciences: the decisions made
by chess players as they select moves in a game. We
carry out our analysis at a large scale, employing
datasets with several million recorded games, and using
chess tablebases to acquire a form of ground truth for
a subset of chess positions that have been completely
solved by computers but remain challenging for even the
best players in the world. We organize our analysis
around three categories of features that we argue are
present in most settings where the analysis of human
error is applicable: the skill of the decision-maker,
the time available to make the decision, and the
inherent difficulty of the decision. We identify rich
structure in all three of these categories of features,
and find strong evidence that in our domain, features
describing the inherent difficulty of an instance are
significantly more powerful than features based on
skill or time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2017:DCM,
author = "Yihan Wang and Shaoxu Song and Lei Chen and Jeffrey Xu
Yu and Hong Cheng",
title = "Discovering Conditional Matching Rules",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "46:1--46:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070647",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Matching dependencies (MDs) have recently been
proposed to make data dependencies tolerant to various
information representations, and found useful in data
quality applications such as record matching. Instead
of the strict equality function used in traditional
dependency syntax (e.g., functional dependencies), MDs
specify constraints based on similarity and
identification. However, in practice, MDs may still be
too strict and applicable only in a subset of tuples in
a relation. Thereby, we study the conditional matching
dependencies (CMDs), which bind matching dependencies
only in a certain part of a table, i.e., MDs
conditionally applicable in a subset of tuples.
Compared to MDs, CMDs have more expressive power that
enables them to satisfy wider application needs. In
this article, we study several important theoretical
and practical issues of CMDs, including irreducible
CMDs with respect to the implication, discovery of CMDs
from data, reliable CMDs agreed most by a relation,
approximate CMDs almost satisfied in a relation, and
finally applications of CMDs in record matching and
missing value repairing. Through an extensive
experimental evaluation in real data sets, we
demonstrate the efficiency of proposed CMDs discovery
algorithms and effectiveness of CMDs in real
applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Anagnostopoulos:2017:QDL,
author = "Christos Anagnostopoulos and Peter Triantafillou",
title = "Query-Driven Learning for Predictive Analytics of Data
Subspace Cardinality",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "47:1--47:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3059177",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Fundamental to many predictive analytics tasks is the
ability to estimate the cardinality (number of data
items) of multi-dimensional data subspaces, defined by
query selections over datasets. This is crucial for
data analysts dealing with, e.g., interactive data
subspace explorations, data subspace visualizations,
and in query processing optimization. However, in many
modern data systems, predictive analytics may be (i)
too costly money-wise, e.g., in clouds, (ii)
unreliable, e.g., in modern Big Data query engines,
where accurate statistics are difficult to
obtain/maintain, or (iii) infeasible, e.g., for privacy
issues. We contribute a novel, query-driven, function
estimation model of analyst-defined data subspace
cardinality. The proposed estimation model is highly
accurate in terms of prediction and accommodating the
well-known selection queries: multi-dimensional range
and distance-nearest neighbors (radius) queries. Our
function estimation model: (i) quantizes the vectorial
query space, by learning the analysts' access patterns
over a data space, (ii) associates query vectors with
their corresponding cardinalities of the
analyst-defined data subspaces, (iii) abstracts and
employs query vectorial similarity to predict the
cardinality of an unseen/unexplored data subspace, and
(iv) identifies and adapts to possible changes of the
query subspaces based on the theory of optimal
stopping. The proposed model is decentralized,
facilitating the scaling-out of such predictive
analytics queries. The research significance of the
model lies in that (i) it is an attractive solution
when data-driven statistical techniques are undesirable
or infeasible, (ii) it offers a scale-out,
decentralized training solution, (iii) it is applicable
to different selection query types, and (iv) it offers
a performance that is superior to that of data-driven
approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2017:LSO,
author = "Yue Wu and Steven C. H. Hoi and Tao Mei and Nenghai
Yu",
title = "Large-Scale Online Feature Selection for Ultra-High
Dimensional Sparse Data",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "48:1--48:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070646",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Feature selection (FS) is an important technique in
machine learning and data mining, especially for
large-scale high-dimensional data. Most existing
studies have been restricted to batch learning, which
is often inefficient and poorly scalable when handling
big data in real world. As real data may arrive
sequentially and continuously, batch learning has to
retrain the model for the new coming data, which is
very computationally intensive. Online feature
selection (OFS) is a promising new paradigm that is
more efficient and scalable than batch learning
algorithms. However, existing online algorithms usually
fall short in their inferior efficacy. In this article,
we present a novel second-order OFS algorithm that is
simple yet effective, very fast and extremely scalable
to deal with large-scale ultra-high dimensional sparse
data streams. The basic idea is to exploit the
second-order information to choose the subset of
important features with high confidence weights. Unlike
existing OFS methods that often suffer from extra high
computational cost, we devise a novel algorithm with a
MaxHeap-based approach, which is not only more
effective than the existing first-order algorithms, but
also significantly more efficient and scalable. Our
extensive experiments validated that the proposed
technique achieves highly competitive accuracy as
compared with state-of-the-art batch FS methods,
meanwhile it consumes significantly less computational
cost that is orders of magnitude lower. Impressively,
on a billion-scale synthetic dataset (1-billion
dimensions, 1-billion non-zero features, and 1-million
samples), the proposed algorithm takes less than 3
minutes to run on a single PC.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Costa:2017:MTA,
author = "Alceu Ferraz Costa and Yuto Yamaguchi and Agma Juci
Machado Traina and Caetano {Traina Jr.} and Christos
Faloutsos",
title = "Modeling Temporal Activity to Detect Anomalous
Behavior in Social Media",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "49:1--49:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3064884",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Social media has become a popular and important tool
for human communication. However, due to this
popularity, spam and the distribution of malicious
content by computer-controlled users, known as bots,
has become a widespread problem. At the same time, when
users use social media, they generate valuable data
that can be used to understand the patterns of human
communication. In this article, we focus on the
following important question: Can we identify and use
patterns of human communication to decide whether a
human or a bot controls a user? The first contribution
of this article is showing that the distribution of
inter-arrival times (IATs) between postings is
characterized by following four patterns: (i)
heavy-tails, (ii) periodic-spikes, (iii) correlation
between consecutive values, and (iv) bimodallity. As
our second contribution, we propose a mathematical
model named Act-M (Activity Model). We show that Act-M
can accurately fit the distribution of IATs from social
media users. Finally, we use Act-M to develop a method
that detects if users are bots based only on the timing
of their postings. We validate Act-M using data from
over 55 million postings from four social media
services: Reddit, Twitter, Stack-Overflow, and
Hacker-News. Our experiments show that Act-M provides a
more accurate fit to the data than existing models for
human dynamics. Additionally, when detecting bots,
Act-M provided a precision higher than 93\% and 77\%
with a sensitivity of 70\% for the Twitter and Reddit
datasets, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Vosoughi:2017:RGP,
author = "Soroush Vosoughi and Mostafa `Neo' Mohsenvand and Deb
Roy",
title = "Rumor Gauge: Predicting the Veracity of Rumors on
{Twitter}",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "50:1--50:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070644",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The spread of malicious or accidental misinformation
in social media, especially in time-sensitive
situations, such as real-world emergencies, can have
harmful effects on individuals and society. In this
work, we developed models for automated verification of
rumors (unverified information) that propagate through
Twitter. To predict the veracity of rumors, we
identified salient features of rumors by examining
three aspects of information spread: linguistic style
used to express rumors, characteristics of people
involved in propagating information, and network
propagation dynamics. The predicted veracity of a time
series of these features extracted from a rumor (a
collection of tweets) is generated using Hidden Markov
Models. The verification algorithm was trained and
tested on 209 rumors representing 938,806 tweets
collected from real-world events, including the 2013
Boston Marathon bombings, the 2014 Ferguson unrest, and
the 2014 Ebola epidemic, and many other rumors about
various real-world events reported on popular websites
that document public rumors. The algorithm was able to
correctly predict the veracity of 75\% of the rumors
faster than any other public source, including
journalists and law enforcement officials. The ability
to track rumors and predict their outcomes may have
practical applications for news consumers, financial
markets, journalists, and emergency services, and more
generally to help minimize the impact of false
information on Twitter.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Boutemine:2017:MCS,
author = "Oualid Boutemine and Mohamed Bouguessa",
title = "Mining Community Structures in Multidimensional
Networks",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "51:1--51:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3080574",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We investigate the problem of community detection in
multidimensional networks, that is, networks where
entities engage in various interaction types
(dimensions) simultaneously. While some approaches have
been proposed to identify community structures in
multidimensional networks, there are a number of
problems still to solve. In fact, the majority of the
proposed approaches suffer from one or even more of the
following limitations: (1) difficulty detecting
communities in networks characterized by the presence
of many irrelevant dimensions, (2) lack of systematic
procedures to explicitly identify the relevant
dimensions of each community, and (3) dependence on a
set of user-supplied parameters, including the number
of communities, that require a proper tuning. Most of
the existing approaches are inadequate for dealing with
these three issues in a unified framework. In this
paper, we develop a novel approach that is capable of
addressing the aforementioned limitations in a single
framework. The proposed approach allows automated
identification of communities and their sub-dimensional
spaces using a novel objective function and a
constrained label propagation-based optimization
strategy. By leveraging the relevance of dimensions at
the node level, the strategy aims to maximize the
number of relevant within-community links while keeping
track of the most relevant dimensions. A notable
feature of the proposed approach is that it is able to
automatically identify low dimensional community
structures embedded in a high dimensional space.
Experiments on synthetic and real multidimensional
networks illustrate the suitability of the new
method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Algizawy:2017:RTL,
author = "Essam Algizawy and Tetsuji Ogawa and Ahmed El-Mahdy",
title = "Real-Time Large-Scale Map Matching Using Mobile Phone
Data",
journal = j-TKDD,
volume = "11",
number = "4",
pages = "52:1--52:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3046945",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 22 09:23:44 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "With the wide spread use of mobile phones, cellular
mobile big data is becoming an important resource that
provides a wealth of information with almost no cost.
However, the data generally suffers from relatively
high spatial granularity, limiting the scope of its
application. In this article, we consider, for the
first time, the utility of actual mobile big data for
map matching allowing for ``microscopic'' level traffic
analysis. The state-of-the-art in map matching
generally targets GPS data, which provides far denser
sampling and higher location resolution than the mobile
data. Our approach extends the typical Hidden-Markov
model used in map matching to accommodate for highly
sparse location trajectories, exploit the large mobile
data volume to learn the model parameters, and exploit
the sparsity of the data to provide for real-time
Viterbi processing. We study an actual, anonymised
mobile trajectories data set of the city of Dakar,
Senegal, spanning a year, and generate a corresponding
road-level traffic density, at an hourly granularity,
for each mobile trajectory. We observed a relatively
high correlation between the generated traffic
intensities and corresponding values obtained by the
gravity and equilibrium models typically used in
mobility analysis, indicating the utility of the
approach as an alternative means for traffic
analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{vanLeeuwen:2018:ETS,
author = "Matthijs van Leeuwen and Polo Chau and Jilles Vreeken
and Dafna Shahaf and Christos Faloutsos",
title = "Editorial: {TKDD} Special Issue on Interactive Data
Exploration and Analytics",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3181707",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rayar:2018:VIS,
author = "Fr{\'e}d{\'e}ric Rayar and Sabine Barrat and Fatma
Bouali and Gilles Venturini",
title = "A Viewable Indexing Structure for the Interactive
Exploration of Dynamic and Large Image Collections",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047011",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Thanks to the capturing devices cost reduction and the
advent of social networks, the size of image
collections is becoming extremely huge. Many works in
the literature have addressed the indexing of large
image collections for search purposes. However, there
is a lack of support for exploratory data mining. One
may want to wander around the images and experience
serendipity in the exploration process. Thus, effective
paradigms not only for organising, but also visualising
these image collections become necessary. In this
article, we present a study to jointly index and
visualise large image collections. The work focuses on
satisfying three constraints. First, large image
collections, up to million of images, shall be handled.
Second, dynamic collections, such as ever-growing
collections, shall be processed in an incremental way,
without reprocessing the whole collection at each
modification. Finally, an intuitive and interactive
exploration system shall be provided to the user to
allow him to easily mine image collections. To this
end, a data partitioning algorithm has been modified
and proximity graphs have been used to fit the
visualisation purpose. A custom web platform has been
implemented to visualise the hierarchical and
graph-based hybrid structure. The results of a user
evaluation we have conducted show that the exploration
of the collections is intuitive and smooth thanks to
the proposed structure. Furthermore, the scalability of
the proposed indexing method is proved using large
public image collections.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Makki:2018:AVV,
author = "Raheleh Makki and Eder Carvalho and Axel J. Soto and
Stephen Brooks and Maria Cristina {Ferreira De
Oliveira} and Evangelos Milios and Rosane Minghim",
title = "{ATR-Vis}: Visual and Interactive Information
Retrieval for Parliamentary Discussions in {Twitter}",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047010",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The worldwide adoption of Twitter turned it into one
of the most popular platforms for content analysis as
it serves as a gauge of the public's feeling and
opinion on a variety of topics. This is particularly
true of political discussions and lawmakers' actions
and initiatives. Yet, one common but unrealistic
assumption is that the data of interest for analysis is
readily available in a comprehensive and accurate form.
Data need to be retrieved, but due to the brevity and
noisy nature of Twitter content, it is difficult to
formulate user queries that match relevant posts that
use different terminology without introducing a
considerable volume of unwanted content. This problem
is aggravated when the analysis must contemplate
multiple and related topics of interest, for which
comments are being concurrently posted. This article
presents Active Tweet Retrieval Visualization
(ATR-Vis), a user-driven visual approach for the
retrieval of Twitter content applicable to this
scenario. The method proposes a set of active retrieval
strategies to involve an analyst in such a way that a
major improvement in retrieval coverage and precision
is attained with minimal user effort. ATR-Vis enables
non-technical users to benefit from the aforementioned
active learning strategies by providing visual aids to
facilitate the requested supervision. This supports the
exploration of the space of potentially relevant
tweets, and affords a better understanding of the
retrieval results. We evaluate our approach in
scenarios in which the task is to retrieve tweets
related to multiple parliamentary debates within a
specific time span. We collected two Twitter datasets,
one associated with debates in the Canadian House of
Commons during a particular week in May 2014, and
another associated with debates in the Brazilian
Federal Senate during a selected week in May 2015. The
two use cases illustrate the effectiveness of ATR-Vis
for the retrieval of relevant tweets, while
quantitative results show that our approach achieves
high retrieval quality with a modest amount of
supervision. Finally, we evaluated our tool with three
external users who perform searching in social media as
part of their professional work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lim:2018:MEA,
author = "Yongsub Lim and Minsoo Jung and U. Kang",
title = "Memory-Efficient and Accurate Sampling for Counting
Local Triangles in Graph Streams: From Simple to
Multigraphs",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3022186",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How can we estimate local triangle counts accurately
in a graph stream without storing the whole graph? How
to handle duplicated edges in local triangle counting
for graph stream? Local triangle counting, which
computes the number of triangles attached to each node
in a graph, is a very important problem with wide
applications in social network analysis, anomaly
detection, web mining, and the like. In this article,
we propose algorithms for local triangle counting in a
graph stream based on edge sampling: M ascot for a
simple graph, and MultiBMascot and MultiWMascot for a
multigraph. To develop Mascot, we first present two
naive local triangle counting algorithms in a graph
stream, called Mascot-C and Mascot-A. Mascot-C is based
on constant edge sampling, and Mascot-A improves its
accuracy by utilizing more memory spaces. Mascot
achieves both accuracy and memory-efficiency of the two
algorithms by unconditional triangle counting for a new
edge, regardless of whether it is sampled or not.
Extending the idea to a multigraph, we develop two
algorithms MultiBMascot and MultiWMascot. MultiBMascot
enables local triangle counting on the corresponding
simple graph of a streamed multigraph without explicit
graph conversion; MultiWMascot considers repeated
occurrences of an edge as its weight and counts each
triangle as the product of its three edge weights. In
contrast to the existing algorithm that requires prior
knowledge on the target graph and appropriately set
parameters, our proposed algorithms require only one
parameter of edge sampling probability. Through
extensive experiments, we show that for the same number
of edges sampled, M ascot provides the best accuracy
compared to the existing algorithm as well as Mascot-C
and Mascot-A. We also demonstrate that MultiBMascot on
a multigraph is comparable to Mascot-C on the
counterpart simple graph, and MultiWMascot becomes more
accurate for higher degree nodes. Thanks to Mascot, we
also discover interesting anomalous patterns in real
graphs, including core-peripheries in the web, a
bimodal call pattern in a phone call history, and
intensive collaboration in DBLP.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shi:2018:VAB,
author = "Lei Shi and Hanghang Tong and Madelaine Daianu and
Feng Tian and Paul M. Thompson",
title = "Visual Analysis of Brain Networks Using Sparse
Regression Models",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3023363",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Studies of the human brain network are becoming
increasingly popular in the fields of neuroscience,
computer science, and neurology. Despite this rapidly
growing line of research, gaps remain on the
intersection of data analytics, interactive visual
representation, and the human intelligence-all needed
to advance our understanding of human brain networks.
This article tackles this challenge by exploring the
design space of visual analytics. We propose an
integrated framework to orchestrate computational
models with comprehensive data visualizations on the
human brain network. The framework targets two
fundamental tasks: the visual exploration of
multi-label brain networks and the visual comparison
among brain networks across different subject groups.
During the first task, we propose a novel interactive
user interface to visualize sets of labeled brain
networks; in our second task, we introduce sparse
regression models to select discriminative features
from the brain network to facilitate the comparison.
Through user studies and quantitative experiments, both
methods are shown to greatly improve the visual
comparison performance. Finally, real-world case
studies with domain experts demonstrate the utility and
effectiveness of our framework to analyze
reconstructions of human brain connectivity maps. The
perceptually optimized visualization design and the
feature selection model calibration are shown to be the
key to our significant findings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Galbrun:2018:MRS,
author = "Esther Galbrun and Pauli Miettinen",
title = "Mining Redescriptions with Siren",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "6:1--6:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3007212",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In many areas of science, scientists need to find
distinct common characterizations of the same objects
and, vice versa, to identify sets of objects that admit
multiple shared descriptions. For example, in biology,
an important task is to identify the bioclimatic
constraints that allow some species to survive, that
is, to describe geographical regions both in terms of
the fauna that inhabits them and of their bioclimatic
conditions. In data analysis, the task of automatically
generating such alternative characterizations is called
redescription mining. If a domain expert wants to use
redescription mining in his research, merely being able
to find redescriptions is not enough. He must also be
able to understand the redescriptions found, adjust
them to better match his domain knowledge, test
alternative hypotheses with them, and guide the mining
process toward results he considers interesting. To
facilitate these goals, we introduce Siren, an
interactive tool for mining and visualizing
redescriptions. Siren allows to obtain redescriptions
in an anytime fashion through efficient, distributed
mining, to examine the results in various linked
visualizations, to interact with the results either
directly or via the visualizations, and to guide the
mining algorithm toward specific redescriptions. In
this article, we explain the features of Siren and why
they are useful for redescription mining. We also
propose two novel redescription mining algorithms that
improve the generalizability of the results compared to
the existing ones.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2018:IDC,
author = "Hao Wu and Maoyuan Sun and Peng Mi and Nikolaj Tatti
and Chris North and Naren Ramakrishnan",
title = "Interactive Discovery of Coordinated Relationship
Chains with Maximum Entropy Models",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "7:1--7:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047017",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Modern visual analytic tools promote human-in-the-loop
analysis but are limited in their ability to direct the
user toward interesting and promising directions of
study. This problem is especially acute when the
analysis task is exploratory in nature, e.g., the
discovery of potentially coordinated relationships in
massive text datasets. Such tasks are very common in
domains like intelligence analysis and security
forensics where the goal is to uncover surprising
coalitions bridging multiple types of relations. We
introduce new maximum entropy models to discover
surprising chains of relationships leveraging count
data about entity occurrences in documents. These
models are embedded in a visual analytic system called
MERCER (Maximum Entropy Relational Chain ExploRer) that
treats relationship bundles as first class objects and
directs the user toward promising lines of inquiry. We
demonstrate how user input can judiciously direct
analysis toward valid conclusions, whereas a purely
algorithmic approach could be led astray. Experimental
results on both synthetic and real datasets from the
intelligence community are presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Choo:2018:VVA,
author = "Jaegul Choo and Hannah Kim and Edward Clarkson and
Zhicheng Liu and Changhyun Lee and Fuxin Li and
Hanseung Lee and Ramakrishnan Kannan and Charles D.
Stolper and John Stasko and Haesun Park",
title = "{VisIRR}: a Visual Analytics System for Information
Retrieval and Recommendation for Large-Scale Document
Data",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "8:1--8:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070616",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In this article, we present an interactive visual
information retrieval and recommendation system, called
VisIRR, for large-scale document discovery. VisIRR
effectively combines the paradigms of (1) a passive
pull through query processes for retrieval and (2) an
active push that recommends items of potential interest
to users based on their preferences. Equipped with an
efficient dynamic query interface against a large-scale
corpus, VisIRR organizes the retrieved documents into
high-level topics and visualizes them in a 2D space,
representing the relationships among the topics along
with their keyword summary. In addition, based on
interactive personalized preference feedback with
regard to documents, VisIRR provides document
recommendations from the entire corpus, which are
beyond the retrieved sets. Such recommended documents
are visualized in the same space as the retrieved
documents, so that users can seamlessly analyze both
existing and newly recommended ones. This article
presents novel computational methods, which make these
integrated representations and fast interactions
possible for a large-scale document corpus. We
illustrate how the system works by providing detailed
usage scenarios. Additionally, we present preliminary
user study results for evaluating the effectiveness of
the system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kamat:2018:SBA,
author = "Niranjan Kamat and Arnab Nandi",
title = "A Session-Based Approach to Fast-But-Approximate
Interactive Data Cube Exploration",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "9:1--9:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070648",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "With the proliferation of large datasets, sampling has
become pervasive in data analysis. Sampling has
numerous benefits-from reducing the computation time
and cost to increasing the scope of interactive
analysis. A popular task in data science, well-suited
toward sampling, is the computation of
fast-but-approximate aggregations over sampled data.
Aggregation is a foundational block of data analysis,
with data cube being its primary construct. We observe
that such aggregation queries are typically issued in
an ad-hoc, interactive setting. In contrast to one-off
queries, a typical query session consists of a series
of quick queries, interspersed with the user inspecting
the results and formulating the next query. The
similarity between session queries opens up
opportunities for reusing computation of not just query
results, but also error estimates. Error estimates need
to be provided alongside sampled results for the
results to be meaningful. We propose Sesame, a rewrite
and caching framework that accelerates the entire
interactive session of aggregation queries over sampled
data. We focus on two unique and computationally
expensive aspects of this use case: query speculation
in the presence of sampling, and error computation, and
provide novel strategies for result and error reuse. We
demonstrate that our approach outperforms conventional
sampled aggregation techniques by at least an order of
magnitude, without modifying the underlying database.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Senin:2018:GID,
author = "Pavel Senin and Jessica Lin and Xing Wang and Tim
Oates and Sunil Gandhi and Arnold P. Boedihardjo and
Crystal Chen and Susan Frankenstein",
title = "{GrammarViz} 3.0: Interactive Discovery of
Variable-Length Time Series Patterns",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "10:1--10:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3051126",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The problems of recurrent and anomalous pattern
discovery in time series, e.g., motifs and discords,
respectively, have received a lot of attention from
researchers in the past decade. However, since the
pattern search space is usually intractable, most
existing detection algorithms require that the patterns
have discriminative characteristics and have its length
known in advance and provided as input, which is an
unreasonable requirement for many real-world problems.
In addition, patterns of similar structure, but of
different lengths may co-exist in a time series.
Addressing these issues, we have developed algorithms
for variable-length time series pattern discovery that
are based on symbolic discretization and grammar
inference-two techniques whose combination enables the
structured reduction of the search space and discovery
of the candidate patterns in linear time. In this work,
we present GrammarViz 3.0-a software package that
provides implementations of proposed algorithms and
graphical user interface for interactive
variable-length time series pattern discovery. The
current version of the software provides an alternative
grammar inference algorithm that improves the time
series motif discovery workflow, and introduces an
experimental procedure for automated discretization
parameter selection that builds upon the minimum
cardinality maximum cover principle and aids the time
series recurrent and anomalous pattern discovery.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Datta:2018:CVC,
author = "Srayan Datta and Eytan Adar",
title = "{CommunityDiff}: Visualizing Community Clustering
Algorithms",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "11:1--11:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047009",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Community detection is an oft-used analytical function
of network analysis but can be a black art to apply in
practice. Grouping of related nodes is important for
identifying patterns in network datasets but also
notoriously sensitive to input data and algorithm
selection. This is further complicated by the fact
that, depending on domain and use case, the ground
truth knowledge of the end-user can vary from none to
complete. In this work, we present CommunityDiff, an
interactive visualization system that combines
visualization and active learning (AL) to support the
end-user's analytical process. As the end-user
interacts with the system, a continuous refinement
process updates both the community labels and
visualizations. CommunityDiff features a mechanism for
visualizing ensemble spaces, weighted combinations of
algorithm output, that can identify patterns,
commonalities, and differences among multiple community
detection algorithms. Among other features,
CommunityDiff introduces an AL mechanism that visually
indicates uncertainty about community labels to focus
end-user attention and supporting end-user control that
ranges from explicitly indicating the number of
expected communities to merging and splitting
communities. Based on this end-user input,
CommunityDiff dynamically recalculates communities. We
demonstrate the viability of our through a study of
speed of end-user convergence on satisfactory community
labels. As part of building CommunityDiff, we describe
a design process that can be adapted to other
Interactive Machine Learning applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2018:LIC,
author = "Yang Yang and Jie Tang and Juanzi Li",
title = "Learning to Infer Competitive Relationships in
Heterogeneous Networks",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "12:1--12:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3051127",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Detecting and monitoring competitors is fundamental to
a company to stay ahead in the global market. Existing
studies mainly focus on mining competitive
relationships within a single data source, while
competing information is usually distributed in
multiple networks. How to discover the underlying
patterns and utilize the heterogeneous knowledge to
avoid biased aspects in this issue is a challenging
problem. In this article, we study the problem of
mining competitive relationships by learning across
heterogeneous networks. We use Twitter and patent
records as our data sources and statistically study the
patterns behind the competitive relationships. We find
that the two networks exhibit different but
complementary patterns of competitions. Overall, we
find that similar entities tend to be competitors, with
a probability of 4 times higher than chance. On the
other hand, in social network, we also find a 10
minutes phenomenon: when two entities are mentioned by
the same user within 10 minutes, the likelihood of them
being competitors is 25 times higher than chance. Based
on the discovered patterns, we propose a novel Topical
Factor Graph Model. Generally, our model defines a
latent topic layer to bridge the Twitter network and
patent network. It then employs a semi-supervised
learning algorithm to classify the relationships
between entities (e.g., companies or products). We test
the proposed model on two real data sets and the
experimental results validate the effectiveness of our
model, with an average of +46\% improvement over
alternative methods. Besides, we further demonstrate
the competitive relationships inferred by our proposed
model can be applied in the job-hopping prediction
problem by achieving an average of +10.7\%
improvement.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2018:PSM,
author = "Boyue Wang and Yongli Hu and Junbin Gao and Yanfeng
Sun and Baocai Yin",
title = "Partial Sum Minimization of Singular Values
Representation on {Grassmann} Manifolds",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "13:1--13:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092690",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering is one of the fundamental topics in data
mining and pattern recognition. As a prospective
clustering method, the subspace clustering has made
considerable progress in recent researches, e.g.,
sparse subspace clustering (SSC) and low rank
representation (LRR). However, most existing subspace
clustering algorithms are designed for vectorial data
from linear spaces, thus not suitable for
high-dimensional data with intrinsic non-linear
manifold structure. For high-dimensional or manifold
data, few research pays attention to clustering
problems. The purpose of clustering on manifolds tends
to cluster manifold-valued data into several groups
according to the mainfold-based similarity metric. This
article proposes an extended LRR model for
manifold-valued Grassmann data that incorporates prior
knowledge by minimizing partial sum of singular values
instead of the nuclear norm, namely Partial Sum
minimization of Singular Values Representation
(GPSSVR). The new model not only enforces the global
structure of data in low rank, but also retains
important information by minimizing only smaller
singular values. To further maintain the local
structures among Grassmann points, we also integrate
the Laplacian penalty with GPSSVR. The proposed model
and algorithms are assessed on a public human face
dataset, some widely used human action video datasets
and a real scenery dataset. The experimental results
show that the proposed methods obviously outperform
other state-of-the-art methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Trevino:2018:DSE,
author = "Edgar S. Garc{\'\i}a Trevi{\~n}o and Muhammad Zaid
Hameed and Javier A. Barria",
title = "Data Stream Evolution Diagnosis Using Recursive
Wavelet Density Estimators",
journal = j-TKDD,
volume = "12",
number = "1",
pages = "14:1--14:??",
month = feb,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3106369",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Data streams are a new class of data that is becoming
pervasively important in a wide range of applications,
ranging from sensor networks, environmental monitoring
to finance. In this article, we propose a novel
framework for the online diagnosis of evolution of
multidimensional streaming data that incorporates
Recursive Wavelet Density Estimators into the context
of Velocity Density Estimation. In the proposed
framework changes in streaming data are characterized
by the use of local and global evolution coefficients.
In addition, we propose for the analysis of changes in
the correlation structure of the data a recursive
implementation of the Pearson correlation coefficient
using exponential discounting. Two visualization tools,
namely temporal and spatial velocity profiles, are
extended in the context of the proposed framework.
These are the three main advantages of the proposed
method over previous approaches: (1) the memory storage
required is minimal and independent of any window size;
(2) it has a significantly lower computational
complexity; and (3) it makes possible the fast
diagnosis of data evolution at all dimensions and at
relevant combinations of dimensions with only one pass
of the data. With the help of the four examples, we
show the framework's relevance in a change detection
context and its potential capability for real world
applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kaushal:2018:ETP,
author = "Vishal Kaushal and Manasi Patwardhan",
title = "Emerging Trends in Personality Identification Using
Online Social Networks --- a Literature Survey",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "15:1--15:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070645",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Personality is a combination of all the
attributes-behavioral, temperamental, emotional, and
mental-that characterizes a unique individual. Ability
to identify personalities of people has always been of
great interest to the researchers due to its
importance. It continues to find highly useful
applications in many domains. Owing to the increasing
popularity of online social networks, researchers have
started looking into the possibility of predicting a
user's personality from his online social networking
profile, which serves as a rich source of textual as
well as non-textual content published by users. In the
process of creating social networking profiles, users
reveal a lot about themselves both in what they share
and how they say it. Studies suggest that the online
social networking websites are, in fact, a relevant and
valid means of communicating personality. In this
article, we review these various studies reported in
literature toward identification of personality using
online social networks. To the best of our knowledge,
this is the first reported survey of its kind at the
time of submission. We hope that our contribution,
especially in summarizing the previous findings and in
identifying the directions for future research in this
area, would encourage researchers to do more work in
this budding area.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pandove:2018:SRC,
author = "Divya Pandove and Shivan Goel and Rinkl Rani",
title = "Systematic Review of Clustering High-Dimensional and
Large Datasets",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "16:1--16:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3132088",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Technological advancement has enabled us to store and
process huge amount of data in relatively short spans
of time. The nature of data is rapidly changing,
particularly its dimensionality is more commonly multi-
and high-dimensional. There is an immediate need to
expand our focus to include analysis of
high-dimensional and large datasets. Data analysis is
becoming a mammoth task, due to incremental increase in
data volume and complexity in terms of heterogony of
data. It is due to this dynamic computing environment
that the existing techniques either need to be modified
or discarded to handle new data in multiple
high-dimensions. Data clustering is a tool that is used
in many disciplines, including data mining, so that
meaningful knowledge can be extracted from seemingly
unstructured data. The aim of this article is to
understand the problem of clustering and various
approaches addressing this problem. This article
discusses the process of clustering from both
microviews (data treating) and macroviews (overall
clustering process). Different distance and similarity
measures, which form the cornerstone of effective data
clustering, are also identified. Further, an in-depth
analysis of different clustering approaches focused on
data mining, dealing with large-scale datasets is
given. These approaches are comprehensively compared to
bring out a clear differentiation among them. This
article also surveys the problem of high-dimensional
data and the existing approaches, that makes it more
relevant. It also explores the latest trends in cluster
analysis, and the real-life applications of this
concept. This survey is exhaustive as it tries to cover
all the aspects of clustering in the field of data
mining.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2018:LSC,
author = "Yixuan Li and Kun He and Kyle Kloster and David Bindel
and John Hopcroft",
title = "Local Spectral Clustering for Overlapping Community
Detection",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "17:1--17:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3106370",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Large graphs arise in a number of contexts and
understanding their structure and extracting
information from them is an important research area.
Early algorithms for mining communities have focused on
global graph structure, and often run in time
proportional to the size of the entire graph. As we
explore networks with millions of vertices and find
communities of size in the hundreds, it becomes
important to shift our attention from macroscopic
structure to microscopic structure in large networks. A
growing body of work has been adopting local expansion
methods in order to identify communities from a few
exemplary seed members. In this article, we propose a
novel approach for finding overlapping communities
called Lemon (Local Expansion via Minimum One Norm).
Provided with a few known seeds, the algorithm finds
the community by performing a local spectral diffusion.
The core idea of Lemon is to use short random walks to
approximate an invariant subspace near a seed set,
which we refer to as local spectra. Local spectra can
be viewed as the low-dimensional embedding that
captures the nodes' closeness in the local network
structure. We show that Lemon's performance in
detecting communities is competitive with
state-of-the-art methods. Moreover, the running time
scales with the size of the community rather than that
of the entire graph. The algorithm is easy to implement
and is highly parallelizable. We further provide
theoretical analysis of the local spectral properties,
bounding the measure of tightness of extracted
community using the eigenvalues of graph Laplacian. We
thoroughly evaluate our approach using both synthetic
and real-world datasets across different domains, and
analyze the empirical variations when applying our
method to inherently different networks in practice. In
addition, the heuristics on how the seed set quality
and quantity would affect the performance are
provided.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Costa:2018:MOC,
author = "Gianni Costa and Riccardo Ortale",
title = "Mining Overlapping Communities and Inner Role
Assignments through {Bayesian} Mixed-Membership Models
of Networks with Context-Dependent Interactions",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "18:1--18:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3106368",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Community discovery and role assignment have been
recently integrated into an unsupervised approach for
the exploratory analysis of overlapping communities and
inner roles in networks. However, the formation of ties
in these prototypical research efforts is not truly
realistic, since it does not account for a fundamental
aspect of link establishment in real-world networks,
i.e., the explicative reasons that cause interactions
among nodes. Such reasons can be interpreted as generic
requirements of nodes, that are met by other nodes and
essentially pertain both to the nodes themselves and to
their interaction contexts (i.e., the respective
communities and roles). In this article, we present two
new model-based machine-learning approaches, wherein
community discovery and role assignment are seamlessly
integrated and simultaneously performed through
approximate posterior inference in Bayesian
mixed-membership models of directed networks. The
devised models account for the explicative reasons
governing link establishment in terms of node-specific
and contextual latent interaction factors. The former
are inherently characteristic of nodes, while the
latter are characterizations of nodes in the context of
the individual communities and roles. The generative
process of both models assigns nodes to communities
with respective roles and connects them through
directed links, which are probabilistically governed by
their node-specific and contextual interaction factors.
The difference between the proposed models lies in the
exploitation of the contextual interaction factors.
More precisely, in one model, the contextual
interaction factors have the same impact on link
generation. In the other model, the contextual
interaction factors are weighted by the extent of
involvement of the linked nodes in the respective
communities and roles. We develop MCMC algorithms
implementing approximate posterior inference and
parameter estimation within our models. Finally, we
conduct an intensive comparative experimentation, which
demonstrates their superiority in community compactness
and link prediction on various real-world and synthetic
networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Long:2018:PMS,
author = "Cheng Long and Raymond Chi-Wing Wong and Victor Junqiu
Wei",
title = "Profit Maximization with Sufficient Customer
Satisfactions",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3110216",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In many commercial campaigns, we observe that there
exists a tradeoff between the number of customers
satisfied by the company and the profit gained. Merely
satisfying as many customers as possible or maximizing
the profit is not desirable. To this end, in this
article, we propose a new problem called
$k$-Satisfiability Assignment for Maximizing the Profit
( $$ k $$-SAMP), where $k$ is a user parameter and a
non-negative integer. Given a set $P$ of products and a
set $O$ of customers, $k$-SAMP is to find an assignment
between $P$ and $O$ such that at least $k$ customers
are satisfied in the assignment and the profit incurred
by this assignment is maximized. Although we find that
this problem is closely related to two classic computer
science problems, namely maximum weight matching and
maximum matching, the techniques developed for these
classic problems cannot be adapted to our $k$-SAMP
problem. In this work, we design a novel algorithm
called Adjust for the $k$-SAMP problem. Given an
assignment $A$, Adjust iteratively increases the profit
of $A$ by adjusting some appropriate matches in $A$
while keeping at least $k$ customers satisfied in $A$.
We prove that Adjust returns a global optimum.
Extensive experiments were conducted that verified the
efficiency of Adjust.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ramezani:2018:CDU,
author = "Maryam Ramezani and Ali Khodadadi and Hamid R.
Rabiee",
title = "Community Detection Using Diffusion Information",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3110215",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Community detection in social networks has become a
popular topic of research during the last decade. There
exist a variety of algorithms for modularizing the
network graph into different communities. However, they
mostly assume that partial or complete information of
the network graphs are available that is not feasible
in many cases. In this article, we focus on detecting
communities by exploiting their diffusion information.
To this end, we utilize the Conditional Random Fields
(CRF) to discover the community structures. The
proposed method, community diffusion (CoDi), does not
require any prior knowledge about the network structure
or specific properties of communities. Furthermore, in
contrast to the structure-based community detection
methods, this method is able to identify the hidden
communities. The experimental results indicate
considerable improvements in detecting communities
based on accuracy, scalability, and real cascade
information measures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chiasserini:2018:ACS,
author = "Carla-Fabiana Chiasserini and Michel Garetto and Emili
Leonardi",
title = "De-anonymizing Clustered Social Networks by
Percolation Graph Matching",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3127876",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Online social networks offer the opportunity to
collect a huge amount of valuable information about
billions of users. The analysis of this data by service
providers and unintended third parties are posing
serious treats to user privacy. In particular, recent
work has shown that users participating in more than
one online social network can be identified based only
on the structure of their links to other users. An
effective tool to de-anonymize social network users is
represented by graph matching algorithms. Indeed, by
exploiting a sufficiently large set of seed nodes, a
percolation process can correctly match almost all
nodes across the different social networks. In this
article, we show the crucial role of clustering, which
is a relevant feature of social network graphs (and
many other systems). Clustering has both the effect of
making matching algorithms more prone to errors, and
the potential to greatly reduce the number of seeds
needed to trigger percolation. We show these facts by
considering a fairly general class of random geometric
graphs with variable clustering level. We assume that
seeds can be identified in particular sub-regions of
the network graph, while no a priori knowledge about
the location of the other nodes is required. Under
these conditions, we show how clever algorithms can
achieve surprisingly good performance while limiting
the number of matching errors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2018:JRL,
author = "Wayne Xin Zhao and Feifan Fan and Ji-Rong Wen and
Edward Y. Chang",
title = "Joint Representation Learning for Location-Based
Social Networks with Multi-Grained Sequential
Contexts",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3127875",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This article studies the problem of learning effective
representations for Location-Based Social Networks
(LBSN), which is useful in many tasks such as location
recommendation and link prediction. Existing network
embedding methods mainly focus on capturing topology
patterns reflected in social connections, while
check-in sequences, the most important data type in
LBSNs, are not directly modeled by these models. In
this article, we propose a representation learning
method for LBSNs called as JRLM++, which models
check-in sequences together with social connections. To
capture sequential relatedness, JRLM++ characterizes
two levels of sequential contexts, namely fine-grained
and coarse-grained contexts. We present a learning
algorithm tailored to the hierarchical architecture of
the proposed model. We conduct extensive experiments on
two important applications using real-world datasets.
The experimental results demonstrate the superiority of
our model. The proposed model can generate effective
representations for both users and locations in the
same embedding space, which can be further utilized to
improve multiple LBSN tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hu:2018:CFT,
author = "Guang-Neng Hu and Xin-Yu Dai and Feng-Yu Qiu and Rui
Xia and Tao Li and Shu-Jian Huang and Jia-Jun Chen",
title = "Collaborative Filtering with Topic and Social Latent
Factors Incorporating Implicit Feedback",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3127873",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Recommender systems (RSs) provide an effective way of
alleviating the information overload problem by
selecting personalized items for different users.
Latent factors-based collaborative filtering (CF) has
become the popular approaches for RSs due to its
accuracy and scalability. Recently, online social
networks and user-generated content provide diverse
sources for recommendation beyond ratings. Although
social matrix factorization (Social MF) and topic
matrix factorization (Topic MF) successfully exploit
social relations and item reviews, respectively; both
of them ignore some useful information. In this
article, we investigate the effective data fusion by
combining the aforementioned approaches. First, we
propose a novel model MR3 to jointly model three
sources of information (i.e., ratings, item reviews,
and social relations) effectively for rating prediction
by aligning the latent factors and hidden topics.
Second, we incorporate the implicit feedback from
ratings into the proposed model to enhance its
capability and to demonstrate its flexibility. We
achieve more accurate rating prediction on real-life
datasets over various state-of-the-art methods.
Furthermore, we measure the contribution from each of
the three data sources and the impact of implicit
feedback from ratings, followed by the sensitivity
analysis of hyperparameters. Empirical studies
demonstrate the effectiveness and efficacy of our
proposed model and its extension.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Perozzi:2018:DCA,
author = "Bryan Perozzi and Leman Akoglu",
title = "Discovering Communities and Anomalies in Attributed
Graphs: Interactive Visual Exploration and
Summarization",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3139241",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a network with node attributes, how can we
identify communities and spot anomalies? How can we
characterize, describe, or summarize the network in a
succinct way? Community extraction requires a measure
of quality for connected subgraphs (e.g., social
circles). Existing subgraph measures, however, either
consider only the connectedness of nodes inside the
community and ignore the cross-edges at the boundary
(e.g., density) or only quantify the structure of the
community and ignore the node attributes (e.g.,
conductance). In this work, we focus on node-attributed
networks and introduce: (1) a new measure of subgraph
quality for attributed communities called normality,
(2) a community extraction algorithm that uses
normality to extract communities and a few
characterizing attributes per community, and (3) a
summarization and interactive visualization approach
for attributed graph exploration. More specifically,
(1) we first introduce a new measure to quantify the
normality of an attributed subgraph. Our normality
measure carefully utilizes structure and attributes
together to quantify both the internal consistency and
external separability. We then formulate an objective
function to automatically infer a few attributes
(called the ``focus'') and respective attribute
weights, so as to maximize the normality score of a
given subgraph. Most notably, unlike many other
approaches, our measure allows for many cross-edges as
long as they can be ``exonerated;'' i.e., either (i)
are expected under a null graph model, and/or (ii)
their boundary nodes do not exhibit the focus
attributes. Next, (2) we propose AMEN (for Attributed
Mining of Entity Networks), an algorithm that
simultaneously discovers the communities and their
respective focus in a given graph, with a goal to
maximize the total normality. Communities for which a
focus that yields high normality cannot be found are
considered low quality or anomalous. Last, (3) we
formulate a summarization task with a multi-criteria
objective, which selects a subset of the communities
that (i) cover the entire graph well, are (ii) high
quality and (iii) diverse in their focus attributes. We
further design an interactive visualization interface
that presents the communities to a user in an
interpretable, user-friendly fashion. The user can
explore all the communities, analyze various
algorithm-generated summaries, as well as devise their
own summaries interactively to characterize the network
in a succinct way. As the experiments on real-world
attributed graphs show, our proposed approaches
effectively find anomalous communities and outperform
several existing measures and methods, such as
conductance, density, OddBall, and SODA. We also
conduct extensive user studies to measure the
capability and efficiency that our approach provides to
the users toward network summarization, exploration,
and sensemaking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bonab:2018:GGO,
author = "Hamed R. Bonab and Fazli Can",
title = "{GOOWE}: Geometrically Optimum and Online-Weighted
Ensemble Classifier for Evolving Data Streams",
journal = j-TKDD,
volume = "12",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3139240",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:45 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Designing adaptive classifiers for an evolving data
stream is a challenging task due to the data size and
its dynamically changing nature. Combining individual
classifiers in an online setting, the ensemble
approach, is a well-known solution. It is possible that
a subset of classifiers in the ensemble outperforms
others in a time-varying fashion. However, optimum
weight assignment for component classifiers is a
problem, which is not yet fully addressed in online
evolving environments. We propose a novel data stream
ensemble classifier, called Geometrically Optimum and
Online-Weighted Ensemble (GOOWE), which assigns optimum
weights to the component classifiers using a sliding
window containing the most recent data instances. We
map vote scores of individual classifiers and true
class labels into a spatial environment. Based on the
Euclidean distance between vote scores and
ideal-points, and using the linear least squares (LSQ)
solution, we present a novel, dynamic, and online
weighting approach. While LSQ is used for batch mode
ensemble classifiers, it is the first time that we
adapt and use it for online environments by providing a
spatial modeling of online ensembles. In order to show
the robustness of the proposed algorithm, we use
real-world datasets and synthetic data generators using
the Massive Online Analysis (MOA) libraries. First, we
analyze the impact of our weighting system on
prediction accuracy through two scenarios. Second, we
compare GOOWE with eight state-of-the-art ensemble
classifiers in a comprehensive experimental
environment. Our experiments show that GOOWE provides
improved reactions to different types of concept drift
compared to our baselines. The statistical tests
indicate a significant improvement in accuracy, with
conservative time and memory requirements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xie:2018:ERP,
author = "Hong Xie and Richard T. B. Ma and John C. S. Lui",
title = "Enhancing Reputation via Price Discounts in E-Commerce
Systems: a Data-Driven Approach",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "26:1--26:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3154417",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Reputation systems have become an indispensable
component of modern E-commerce systems, as they help
buyers make informed decisions in choosing trustworthy
sellers. To attract buyers and increase the transaction
volume, sellers need to earn reasonably high reputation
scores. This process usually takes a substantial amount
of time. To accelerate this process, sellers can
provide price discounts to attract users, but the
underlying difficulty is that sellers have no prior
knowledge on buyers' preferences over price discounts.
In this article, we develop an online algorithm to
infer the optimal discount rate from data. We first
formulate an optimization framework to select the
optimal discount rate given buyers' discount
preferences, which is a tradeoff between the short-term
profit and the ramp-up time (for reputation). We then
derive the closed-form optimal discount rate, which
gives us key insights in applying a stochastic bandits
framework to infer the optimal discount rate from the
transaction data with regret upper bounds. We show that
the computational complexity of evaluating the
performance metrics is infeasibly high, and therefore,
we develop efficient randomized algorithms with
guaranteed performance to approximate them. Finally, we
conduct experiments on a dataset crawled from eBay.
Experimental results show that our framework can trade
60\% of the short-term profit for reducing the ramp-up
time by 40\%. This reduction in the ramp-up time can
increase the long-term profit of a seller by at least
20\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Belcastro:2018:GRA,
author = "Loris Belcastro and Fabrizio Marozzo and Domenico
Talia and Paolo Trunfio",
title = "{G-RoI}: Automatic Region-of-Interest Detection Driven
by Geotagged Social Media Data",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "27:1--27:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3154411",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Geotagged data gathered from social media can be used
to discover interesting locations visited by users
called Places-of-Interest (PoIs). Since a PoI is
generally identified by the geographical coordinates of
a single point, it is hard to match it with user
trajectories. Therefore, it is useful to define an
area, called Region-of-Interest ( RoI ), to represent
the boundaries of the PoI's area. RoI mining techniques
are aimed at discovering ROIs from PoIs and other data.
Existing RoI mining techniques are based on three main
approaches: predefined shapes, density-based
clustering, and grid-based aggregation. This article
proposes G-RoI, a novel RoI mining technique that
exploits the indications contained in geotagged social
media items to discover RoIs with a high accuracy.
Experiments performed over a set of PoIs in Rome and
Paris using social media geotagged data, demonstrate
that G-RoI in most cases achieves better results than
existing techniques. In particular, the mean F$_1$
score is 0.34 higher than that obtained with the
well-known DBSCAN algorithm in Rome RoIs and 0.23
higher in Paris RoIs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shin:2018:FAF,
author = "Kijung Shin and Bryan Hooi and Christos Faloutsos",
title = "Fast, Accurate, and Flexible Algorithms for Dense
Subtensor Mining",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "28:1--28:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3154414",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a large-scale and high-order tensor, how can we
detect dense subtensors in it? Can we spot them in
near-linear time but with quality guarantees? Extensive
previous work has shown that dense subtensors, as well
as dense subgraphs, indicate anomalous or fraudulent
behavior (e.g., lockstep behavior in social networks).
However, available algorithms for detecting dense
subtensors are not satisfactory in terms of speed,
accuracy, and flexibility. In this work, we propose two
algorithms, called M-Zoom and M-Biz, for fast and
accurate dense-subtensor detection with various density
measures. M-Zoom gives a lower bound on the density of
detected subtensors, while M-Biz guarantees the local
optimality of detected subtensors. M-Zoom and M-Biz can
be combined, giving the following advantages: (1)
Scalable: scale near-linearly with all aspects of
tensors and are up to 114$ \times $ faster than
state-of-the-art methods with similar accuracy, (2)
Provably accurate: provide a guarantee on the lowest
density and local optimality of the subtensors they
find, (3) Flexible: support multi-subtensor detection
and size bounds as well as diverse density measures,
and (4) Effective: successfully detected edit wars and
bot activities in Wikipedia, and spotted network
attacks from a TCP dump with near-perfect accuracy (AUC
= 0.98).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liang:2018:PRA,
author = "Jiongqian Liang and Deepak Ajwani and Patrick K.
Nicholson and Alessandra Sala and Srinivasan
Parthasarathy",
title = "Prioritized Relationship Analysis in Heterogeneous
Information Networks",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "29:1--29:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3154401",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "An increasing number of applications are modeled and
analyzed in network form, where nodes represent
entities of interest and edges represent interactions
or relationships between entities. Commonly, such
relationship analysis tools assume homogeneity in both
node type and edge type. Recent research has sought to
redress the assumption of homogeneity and focused on
mining heterogeneous information networks (HINs) where
both nodes and edges can be of different types.
Building on such efforts, in this work, we articulate a
novel approach for mining relationships across entities
in such networks while accounting for user preference
over relationship type and interestingness metric. We
formalize the problem as a top-$k$ lightest paths
problem, contextualized in a real-world communication
network, and seek to find the k most interesting path
instances matching the preferred relationship type. Our
solution, PROphetic HEuristic Algorithm for Path
Searching (PRO-HEAPS), leverages a combination of novel
graph preprocessing techniques, well-designed
heuristics and the venerable $ A* $ search algorithm.
We run our algorithm on real-world large-scale graphs
and show that our algorithm significantly outperforms a
wide variety of baseline approaches with speedups as
large as 100X. To widen the range of applications, we
also extend PRO-HEAPS to (i) support relationship
analysis between two groups of entities and (ii) allow
pattern path in the query to contain logical statements
with operators AND, OR, NOT, and wild-card ``.''. We
run experiments using this generalized version of
PRO-HEAPS and demonstrate that the advantage of
PRO-HEAPS becomes even more pronounced for these
general cases. Furthermore, we conduct a comprehensive
analysis to study how the performance of PRO-HEAPS
varies with respect to various attributes of the input
HIN. We finally conduct a case study to demonstrate
valuable applications of our algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2018:WTC,
author = "Hong Huang and Yuxiao Dong and Jie Tang and Hongxia
Yang and Nitesh V. Chawla and Xiaoming Fu",
title = "Will Triadic Closure Strengthen Ties in Social
Networks?",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "30:1--30:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3154399",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The social triad-a group of three people-is one of the
simplest and most fundamental social groups. Extensive
network and social theories have been developed to
understand its structure, such as triadic closure and
social balance. Over the course of a triadic
closure-the transition from two ties to three among
three users, the strength dynamics of its social ties,
however, are much less well understood. Using two
dynamic networks from social media and mobile
communication, we examine how the formation of the
third tie in a triad affects the strength of the
existing two ties. Surprisingly, we find that in about
80\% social triads, the strength of the first two ties
is weakened although averagely the tie strength in the
two networks maintains an increasing or stable trend.
We discover that (1) the decrease in tie strength among
three males is more sharply than that among females,
and (2) the tie strength between celebrities is more
likely to be weakened as the closure of a triad than
those between ordinary people. Furthermore, we
formalize a triadic tie strength dynamics prediction
problem to infer whether social ties of a triad will
become weakened after its closure. We propose a TRIST
method-a kernel density estimation (KDE)-based
graphical model-to solve the problem by incorporating
user demographics, temporal effects, and structural
information. Extensive experiments demonstrate that
TRIST offers a greater than 82\% potential
predictability for inferring triadic tie strength
dynamics in both networks. The leveraging of the KDE
and structural correlations enables TRIST to outperform
baselines by up to 30\% in terms of F1-score.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2018:LSB,
author = "Guangyong Chen and Fengyuan Zhu and Pheng Ann Heng",
title = "Large-Scale {Bayesian} Probabilistic Matrix
Factorization with Memo-Free Distributed Variational
Inference",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "31:1--31:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3161886",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Bayesian Probabilistic Matrix Factorization (BPMF) is
a powerful model in many dyadic data prediction
problems, especially the applications of Recommender
system. However, its poor scalability has limited its
wide applications on massive data. Based on the
conditional independence property of observed entries
in BPMF model, we propose a novel distributed memo-free
variational inference method for large-scale matrix
factorization problems. Compared with the
state-of-the-art methods, the proposed method is
favored for several attractive properties.
Specifically, it does not require tuning of learning
rate carefully, shuffling the training set at each
iteration, or storing massive redundant variables, and
can introduce new agents into the computations on the
fly. We conduct extensive experiments on both synthetic
and real-world datasets. The experimental results show
that our method can converge significantly faster with
better prediction performance than alternative
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2018:MVL,
author = "Sheng Li and Ming Shao and Yun Fu",
title = "Multi-View Low-Rank Analysis with Applications to
Outlier Detection",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "32:1--32:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3168363",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Detecting outliers or anomalies is a fundamental
problem in various machine learning and data mining
applications. Conventional outlier detection algorithms
are mainly designed for single-view data. Nowadays,
data can be easily collected from multiple views, and
many learning tasks such as clustering and
classification have benefited from multi-view data.
However, outlier detection from multi-view data is
still a very challenging problem, as the data in
multiple views usually have more complicated
distributions and exhibit inconsistent behaviors. To
address this problem, we propose a multi-view low-rank
analysis (MLRA) framework for outlier detection in this
article. MLRA pursuits outliers from a new perspective,
robust data representation. It contains two major
components. First, the cross-view low-rank coding is
performed to reveal the intrinsic structures of data.
In particular, we formulate a regularized
rank-minimization problem, which is solved by an
efficient optimization algorithm. Second, the outliers
are identified through an outlier score estimation
procedure. Different from the existing multi-view
outlier detection methods, MLRA is able to detect two
different types of outliers from multiple views
simultaneously. To this end, we design a criterion to
estimate the outlier scores by analyzing the obtained
representation coefficients. Moreover, we extend MLRA
to tackle the multi-view group outlier detection
problem. Extensive evaluations on seven UCI datasets,
the MovieLens, the USPS-MNIST, and the WebKB datasets
demonstrate that our approach outperforms several
state-of-the-art outlier detection methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Altowim:2018:PAP,
author = "Yasser Altowim and Dmitri V. Kalashnikov and Sharad
Mehrotra",
title = "{ProgressER}: Adaptive Progressive Approach to
Relational Entity Resolution",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "33:1--33:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3154410",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Entity resolution (ER) is the process of identifying
which entities in a dataset refer to the same
real-world object. In relational ER, the dataset
consists of multiple entity-sets and relationships
among them. Such relationships cause the resolution of
some entities to influence the resolution of other
entities. For instance, consider a relational dataset
that consists of a set of research paper entities and a
set of venue entities. In such a dataset, deciding that
two research papers are the same may trigger the fact
that their venues are also the same. This article
proposes a progressive approach to relational ER, named
ProgressER, that aims to produce the highest quality
result given a constraint on the resolution budget,
specified by the user. Such a progressive approach is
useful for many emerging analytical applications that
require low latency response (and thus cannot tolerate
delays caused by cleaning the entire dataset) and/or in
situations where the underlying resources are
constrained or costly to use. To maximize the quality
of the result, ProgressER follows an adaptive strategy
that periodically monitors and reassesses the
resolution progress to determine which parts of the
dataset should be resolved next and how they should be
resolved. More specifically, ProgressER divides the
input budget into several resolution windows and
analyzes the resolution progress at the beginning of
each window to generate a resolution plan for the
current window. A resolution plan specifies which
blocks of entities and which entity pairs within blocks
need to be resolved during the plan execution phase of
that window. In addition, ProgressER specifies, for
each identified pair of entities, the order in which
the similarity functions should be applied on the pair.
Such an order plays a significant role in reducing the
overall cost because applying the first few functions
in this order might be sufficient to resolve the pair.
The empirical evaluation of ProgressER demonstrates its
significant advantage in terms of progressiveness over
the traditional ER techniques for the given problem
settings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bakerman:2018:TGH,
author = "Jordan Bakerman and Karl Pazdernik and Alyson Wilson
and Geoffrey Fairchild and Rian Bahran",
title = "{Twitter} Geolocation: a Hybrid Approach",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "34:1--34:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178112",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Geotagging Twitter messages is an important tool for
event detection and enrichment. Despite the
availability of both social media content and user
network information, these two features are generally
utilized separately in the methodology. In this
article, we create a hybrid method that uses Twitter
content and network information jointly as model
features. We use Gaussian mixture models to map the raw
spatial distribution of the model features to a
predicted field. This approach is scalable to large
datasets and provides a natural representation of model
confidence. Our method is tested against other
approaches and we achieve greater prediction accuracy.
The model also improves both precision and coverage.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Moreno:2018:TKP,
author = "Sebastian Moreno and Jennifer Neville and Sergey
Kirshner",
title = "Tied {Kronecker} Product Graph Models to Capture
Variance in Network Populations",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "35:1--35:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3161885",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Much of the past work on mining and modeling networks
has focused on understanding the observed properties of
single example graphs. However, in many real-life
applications it is important to characterize the
structure of populations of graphs. In this work, we
analyze the distributional properties of probabilistic
generative graph models (PGGMs) for network
populations. PGGMs are statistical methods that model
the network distribution and match common
characteristics of real-world networks. Specifically,
we show that most PGGMs cannot reflect the natural
variability in graph properties observed across
multiple networks because their edge generation process
assumes independence among edges. Then, we propose the
mixed Kronecker Product Graph Model (mKPGM), a scalable
generalization of KPGMs that uses tied parameters to
increase the variability of the sampled networks, while
preserving the edge probabilities in expectation. We
compare mKPGM to several other graph models. The
results show that learned mKPGMs accurately represent
the characteristics of real-world networks, while also
effectively capturing the natural variability in
network structure.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2018:FFR,
author = "Pei Yang and Qi Tan and Jingrui He",
title = "Function-on-Function Regression with Mode-Sparsity
Regularization",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "36:1--36:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178113",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Functional data is ubiquitous in many domains, such as
healthcare, social media, manufacturing process, sensor
networks, and so on. The goal of function-on-function
regression is to build a mapping from functional
predictors to functional response. In this article, we
propose a novel function-on-function regression model
based on mode-sparsity regularization. The main idea is
to represent the regression coefficient function
between predictor and response as the double expansion
of basis functions, and then use a mode-sparsity
regularization to automatically filter out irrelevant
basis functions for both predictors and responses. The
proposed approach is further extended to the tensor
version to accommodate multiple functional predictors.
While allowing the dimensionality of the regression
weight matrix or tensor to be relatively large, the
mode-sparsity regularized model facilitates the
multi-way shrinking of basis functions for each mode.
The proposed mode-sparsity regularization covers a wide
spectrum of sparse models for function-on-function
regression. The resulting optimization problem is
challenging due to the non-smooth property of the
mode-sparsity regularization. We develop an efficient
algorithm to solve the problem, which works in an
iterative update fashion, and converges to the global
optimum. Furthermore, we analyze the generalization
performance of the proposed method and derive an upper
bound for the consistency between the recovered
function and the underlying true function. The
effectiveness of the proposed approach is verified on
benchmark functional datasets in various domains.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Khodadadi:2018:CTU,
author = "Ali Khodadadi and Seyed Abbas Hosseini and Erfan
Tavakoli and Hamid R. Rabiee",
title = "Continuous-Time User Modeling in Presence of Badges: a
Probabilistic Approach",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "37:1--37:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3162050",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "User modeling plays an important role in delivering
customized web services to the users and improving
their engagement. However, most user models in the
literature do not explicitly consider the temporal
behavior of users. More recently, continuous-time user
modeling has gained considerable attention and many
user behavior models have been proposed based on
temporal point processes. However, typical point
process-based models often considered the impact of
peer influence and content on the user participation
and neglected other factors. Gamification elements are
among those factors that are neglected, while they have
a strong impact on user participation in online
services. In this article, we propose interdependent
multi-dimensional temporal point processes that capture
the impact of badges on user participation besides the
peer influence and content factors. We extend the
proposed processes to model user actions over the
community-based question and answering websites, and
propose an inference algorithm based on
Variational-Expectation Maximization that can
efficiently learn the model parameters. Extensive
experiments on both synthetic and real data gathered
from Stack Overflow show that our inference algorithm
learns the parameters efficiently and the proposed
method can better predict the user behavior compared to
the alternatives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Peng:2018:MEO,
author = "Min Peng and Jiahui Zhu and Hua Wang and Xuhui Li and
Yanchun Zhang and Xiuzhen Zhang and Gang Tian",
title = "Mining Event-Oriented Topics in Microblog Stream with
Unsupervised Multi-View Hierarchical Embedding",
journal = j-TKDD,
volume = "12",
number = "3",
pages = "38:1--38:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3173044",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "This article presents an unsupervised multi-view
hierarchical embedding (UMHE) framework to sufficiently
reveal the intrinsic topical knowledge in social
events. Event-oriented topics are highly related to
such events as it can provide explicit descriptions of
what have happened in social community. In many
real-world cases, however, it is difficult to include
all attributes of microblogs, more often, textual
aspects only are available. Traditional topic modelling
methods have failed to generate event-oriented topics
with the textual aspects, since the inherent relations
between topics are often overlooked in these methods.
Meanwhile, the metrics in original word vocabulary
space might not effectively capture semantic distances.
Our UMHE framework overcomes the severe information
deficiency and poor feature representation. The UMHE
first develops a multi-view Bayesian rose tree to
preliminarily generate prior knowledge for latent
topics and their relations. With such prior knowledge,
we design an unsupervised translation-based
hierarchical embedding method to make a better
representation of these latent topics. By applying
self-adaptive spectral clustering on the embedding
space and the original space concomitantly, we
eventually extract event-oriented topics in word
distributions to express social events. Our framework
is purely data-driven and unsupervised, without any
external knowledge. Experimental results on TREC
Tweets2011 dataset and Sina Weibo dataset demonstrate
that the UMHE framework can construct hierarchical
structure with high fitness, but also yield topic
embeddings with salient semantics; therefore, it can
derive event-oriented topics with meaningful
descriptions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Toth:2018:GDT,
author = "Edward Toth and Sanjay Chawla",
title = "{GT$ \Delta $}: Detecting Temporal Changes in Group
Stochastic Processes",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "39:1--39:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3183346",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Given a portfolio of stocks or a series of frames in a
video how do we detect significant changes in a group
of values for real-time applications? In this article,
we formalize the problem of sequentially detecting
temporal changes in a group of stochastic processes. As
a solution to this particular problem, we propose the
group temporal change (GT$ \Delta $) algorithm, a
simple yet effective technique for the sequential
detection of significant changes in a variety of
statistical properties of a group over time. Due to the
flexible framework of the GT$ \Delta $ algorithm, a
domain expert is able to select one or more statistical
properties that they are interested in monitoring. The
usefulness of our proposed algorithm is also
demonstrated against state-of-the-art techniques on
synthetically generated data as well as on two
real-world applications; a portfolio of healthcare
stocks over a 20 year period and a video monitoring the
activity of our Sun.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xie:2018:SNM,
author = "Wei Xie and Feida Zhu and Jing Xiao and Jianzong
Wang",
title = "Social Network Monitoring for Bursty Cascade
Detection",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "40:1--40:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3178048",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Social network services have become important and
efficient platforms for users to share all kinds of
information. The capability to monitor user-generated
information and detect bursts from information
diffusions in these social networks brings value to a
wide range of real-life applications, such as viral
marketing. However, in reality, as a third party, there
is always a cost for gathering information from each
user or so-called social network sensor. The question
then arises how to select a budgeted set of social
network sensors to form the data stream for burst
detection without compromising the detection
performance. In this article, we present a general
sensor selection solution for different burst detection
approaches. We formulate this problem as a constraint
satisfaction problem that has high computational
complexity. To reduce the computational cost, we first
reduce most of the constraints by making use of the
fact that bursty cascades are rare among the whole
population. We then transform the problem into an
Linear Programming (LP) problem. Furthermore, we use
the sub-gradient method instead of the standard simplex
method or interior-point method to solve the LP
problem, which makes it possible for our solution to
scale up to large social networks. Evaluating our
solution on millions of real information cascades, we
demonstrate both the effectiveness and efficiency of
our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2018:MGC,
author = "Xiaowei Chen and John C. S. Lui",
title = "Mining Graphlet Counts in Online Social Networks",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "41:1--41:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182392",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Counting subgraphs is a fundamental analysis task for
online social networks (OSNs). Given the sheer size and
restricted access of OSN, efficient computation of
subgraph counts is highly challenging. Although a
number of algorithms have been proposed to estimate the
relative counts of subgraphs in OSNs with restricted
access, there are only few works which try to solve a
more general problem, i.e., counting subgraph
frequencies. In this article, we propose an efficient
random walk-based framework to estimate the subgraph
counts. Our framework generates samples by leveraging
consecutive steps of the random walk as well as by
observing neighbors of visited nodes. Using the
importance sampling technique, we derive unbiased
estimators of the subgraph counts. To make better use
of the degree information of visited nodes, we also
design improved estimators, which increases the
accuracy of the estimation with no additional cost. We
conduct extensive experimental evaluation on real-world
OSNs to confirm our theoretical claims. The experiment
results show that our estimators are unbiased,
accurate, efficient, and better than the
state-of-the-art algorithms. For the Weibo graph with
more than 58 million nodes, our method produces
estimate of triangle count with an error less than 5\%
using only 20,000 sampled nodes. Detailed comparison
with the state-of-the-art methods demonstrates that our
algorithm is 2--10 times more accurate.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2018:CGM,
author = "Hongfu Liu and Yun Fu",
title = "Consensus Guided Multi-View Clustering",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "42:1--42:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182384",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In recent decades, tremendous emerging techniques
thrive the artificial intelligence field due to the
increasing collected data captured from multiple
sensors. These multi-view data provide more rich
information than traditional single-view data. Fusing
heterogeneous information for certain tasks is a core
part of multi-view learning, especially for multi-view
clustering. Although numerous multi-view clustering
algorithms have been proposed, most scholars focus on
finding the common space of different views, but
unfortunately ignore the benefits from partition level
by ensemble clustering. For ensemble clustering,
however, there is no interaction between individual
partitions from each view and the final consensus one.
To fill the gap, we propose a Consensus Guided
Multi-View Clustering (CMVC) framework, which
incorporates the generation of basic partitions from
each view and fusion of consensus clustering in an
interactive way, i.e., the consensus clustering guides
the generation of basic partitions, and high quality
basic partitions positively contribute to the consensus
clustering as well. We design a non-trivial
optimization solution to formulate CMVC into two
iterative $k$-means clusterings by an approximate
calculation. In addition, the generalization of CMVC
provides a rich feasibility for different scenarios,
and the extension of CMVC with incomplete multi-view
clustering further validates the effectiveness for
real-world applications. Extensive experiments
demonstrate the advantages of CMVC over other widely
used multi-view clustering methods in terms of cluster
validity, and the robustness of CMVC to some important
parameters and incomplete multi-view data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2018:BGD,
author = "Hung-Hsuan Chen",
title = "{Behavior2Vec}: Generating Distributed Representations
of Users' Behaviors on Products for Recommender
Systems",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "43:1--43:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3184454",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Most studies on recommender systems target at
increasing the click through rate, and hope that the
number of orders will increase as well. We argue that
clicking and purchasing an item are different
behaviors. Thus, we should probably apply different
strategies for different objectives, e.g., increase the
click through rate, or increase the order rate. In this
article, we propose to generate the distributed
representations of users' viewing and purchasing
behaviors on an e-commerce website. By leveraging on
the cosine distance between the distributed
representations of the behaviors on items under
different contexts, we can predict a user's next
clicking or purchasing item more precisely, compared to
several baseline methods. Perhaps more importantly, we
found that the distributed representations may help
discover interesting analogies among the products. We
may utilize such analogies to explain how two products
are related, and eventually apply different
recommendation strategies under different scenarios. We
developed the Behavior2Vec library for demonstration.
The library can be accessed at
https://github.com/ncu-dart/behavior2vec/.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Saha:2018:EMO,
author = "Sriparna Saha and Sayantan Mitra and Stefan Kramer",
title = "Exploring Multiobjective Optimization for Multiview
Clustering",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "44:1--44:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182181",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We present a new multiview clustering approach based
on multiobjective optimization. In contrast to existing
clustering algorithms based on multiobjective
optimization, it is generally applicable to data
represented by two or more views and does not require
specifying the number of clusters a priori. The
approach builds upon the search capability of a
multiobjective simulated annealing based technique,
AMOSA, as the underlying optimization technique. In the
first version of the proposed approach, an internal
cluster validity index is used to assess the quality of
different partitionings obtained using different views.
A new way of checking the compatibility of these
different partitionings is also proposed and this is
used as another objective function. A new encoding
strategy and some new mutation operators are
introduced. Finally, a new way of computing a consensus
partitioning from multiple individual partitions
obtained on multiple views is proposed. As a baseline
and for comparison, two multiobjective based ensemble
clustering techniques are proposed to combine the
outputs of different simple clustering approaches. The
efficacy of the proposed clustering methods is shown
for partitioning several real-world datasets having
multiple views. To show the practical usefulness of the
method, we present results on web-search result
clustering, where the task is to find a suitable
partitioning of web snippets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2018:GRS,
author = "Hao Wu and Yue Ning and Prithwish Chakraborty and
Jilles Vreeken and Nikolaj Tatti and Naren
Ramakrishnan",
title = "Generating Realistic Synthetic Population Datasets",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "45:1--45:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182383",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Modern studies of societal phenomena rely on the
availability of large datasets capturing attributes and
activities of synthetic, city-level, populations. For
instance, in epidemiology, synthetic population
datasets are necessary to study disease propagation and
intervention measures before implementation. In social
science, synthetic population datasets are needed to
understand how policy decisions might affect
preferences and behaviors of individuals. In public
health, synthetic population datasets are necessary to
capture diagnostic and procedural characteristics of
patient records without violating confidentialities of
individuals. To generate such datasets over a large set
of categorical variables, we propose the use of the
maximum entropy principle to formalize a generative
model such that in a statistically well-founded way we
can optimally utilize given prior information about the
data, and are unbiased otherwise. An efficient
inference algorithm is designed to estimate the maximum
entropy model, and we demonstrate how our approach is
adept at estimating underlying data distributions. We
evaluate this approach against both simulated data and
US census datasets, and demonstrate its feasibility
using an epidemic simulation application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{LaFond:2018:DSC,
author = "Timothy {La Fond} and Jennifer Neville and Brian
Gallagher",
title = "Designing Size Consistent Statistics for Accurate
Anomaly Detection in Dynamic Networks",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "46:1--46:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3185059",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "An important task in network analysis is the detection
of anomalous events in a network time series. These
events could merely be times of interest in the network
timeline or they could be examples of malicious
activity or network malfunction. Hypothesis testing
using network statistics to summarize the behavior of
the network provides a robust framework for the anomaly
detection decision process. Unfortunately, choosing
network statistics that are dependent on confounding
factors like the total number of nodes or edges can
lead to incorrect conclusions (e.g., false positives
and false negatives). In this article, we describe the
challenges that face anomaly detection in dynamic
network streams regarding confounding factors. We also
provide two solutions to avoiding error due to
confounding factors: the first is a randomization
testing method that controls for confounding factors,
and the second is a set of size-consistent network
statistics that avoid confounding due to the most
common factors, edge count and node count.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nesa:2018:IIG,
author = "Nashreen Nesa and Tania Ghosh and Indrajit Banerjee",
title = "{iGRM}: Improved Grey Relational Model and Its
Ensembles for Occupancy Sensing in {Internet} of Things
Applications",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "47:1--47:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3186268",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Occupancy detection is one of the many applications of
Building Automation Systems (BAS) or Heating,
Ventilation, and Air Conditioning (HVAC) control
systems, especially, with the rising demand of Internet
of Things (IoT) services. This article describes the
fusion of data collected from sensors by exploiting
their potential to sense occupancy in a room. For this
purpose, a sensor test bed is deployed that includes
four sensors measuring temperature, relative humidity,
distance from the first obstacle, and light along with
a Arduino micro-controller to validate our model. In
addition, this article proposes three algorithms for
efficient fusion of the sensor data that is inspired by
the Grey theory. An improved Grey Relational Model
(iGRM) is proposed, which acts as the base classifier
for the other two algorithms, namely, Grey Relational
Model with Bagging (iGRM-BG) and Grey Relational Model
with Boosting (iGRM-BT). Furthermore, all three
algorithms use a sliding window concept, where only the
samples inside the window participate in model
training. Also, we have considered varying number of
window size for optimal comparison. The algorithms were
tested against the experimental data collected through
a test bed as well as on a publicly available large
dataset, where both the ensemble models, iGRM-BG and
iGRM-BT, are seen to enhance the performance of iGRM.
The results reveal exceptionally high performances with
accuracies above 95\% (iGRM) and up to 100\% (iGRM-BT)
for the experimental dataset and above 98.24\% (iGRM)
and up to 99.49\% (iGRM-BG) using the publicly
available dataset. Among the three proposed models,
iGRM-BG was observed to outperform both iGRM and
iGRM-BT owing to its advantage of being an ensemble
model and its robustness against over-fitting.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bressan:2018:MCB,
author = "Marco Bressan and Flavio Chierichetti and Ravi Kumar
and Stefano Leucci and Alessandro Panconesi",
title = "{Motif} Counting Beyond Five Nodes",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "48:1--48:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3186586",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Counting graphlets is a well-studied problem in graph
mining and social network analysis. Recently, several
papers explored very simple and natural algorithms
based on Monte Carlo sampling of Markov Chains (MC),
and reported encouraging results. We show, perhaps
surprisingly, that such algorithms are outperformed by
color coding (CC) [2], a sophisticated algorithmic
technique that we extend to the case of graphlet
sampling and for which we prove strong statistical
guarantees. Our computational experiments on graphs
with millions of nodes show CC to be more accurate than
MC; furthermore, we formally show that the mixing time
of the MC approach is too high in general, even when
the input graph has high conductance. All this comes at
a price however. While MC is very efficient in terms of
space, CC's memory requirements become demanding when
the size of the input graph and that of the graphlets
grow. And yet, our experiments show that CC can push
the limits of the state-of-the-art, both in terms of
the size of the input graph and of that of the
graphlets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nguyen:2018:EUP,
author = "Minh-Tien Nguyen and Duc-Vu Tran and Le-Minh Nguyen
and Xuan-Hieu Phan",
title = "Exploiting User Posts for {Web} Document
Summarization",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "49:1--49:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3186566",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Relevant user posts such as comments or tweets of a
Web document provide additional valuable information to
enrich the content of this document. When creating user
posts, readers tend to borrow salient words or phrases
in sentences. This can be considered as word variation.
This article proposes a framework that models the word
variation aspect to enhance the quality of Web document
summarization. Technically, the framework consists of
two steps: scoring and selection. In the first step,
the social information of a Web document such as user
posts is exploited to model intra-relations and
inter-relations in lexical and semantic levels. These
relations are denoted by a mutual reinforcement
similarity graph used to score each sentence and user
post. After scoring, summaries are extracted by using a
ranking approach or concept-based method formulated in
the form of Integer Linear Programming. To confirm the
efficiency of our framework, sentence and story
highlight extraction tasks were taken as a case study
on three datasets in two languages, English and
Vietnamese. Experimental results show that: (i) the
framework can improve ROUGE-scores compared to
state-of-the-art baselines of social context
summarization and (ii) the combination of the two
relations benefits the sentence extraction of single
Web documents.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2018:ERC,
author = "Bo Li and Yevgeniy Vorobeychik",
title = "Evasion-Robust Classification on Binary Domains",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "50:1--50:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3186282",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The success of classification learning has led to
numerous attempts to apply it in adversarial settings
such as spam and malware detection. The core challenge
in this class of applications is that adversaries are
not static, but make a deliberate effort to evade the
classifiers. We investigate both the problem of
modeling the objectives of such adversaries, as well as
the algorithmic problem of accounting for rational,
objective-driven adversaries. We first present a
general approach based on mixed-integer linear
programming (MILP) with constraint generation. This
approach is the first to compute an optimal solution to
adversarial loss minimization for two general classes
of adversarial evasion models in the context of binary
feature spaces. To further improve scalability and
significantly generalize the scope of the MILP-based
method, we propose a principled iterative retraining
framework, which can be used with arbitrary classifiers
and essentially arbitrary attack models. We show that
the retraining approach, when it converges, minimizes
an upper bound on adversarial loss. Extensive
experiments demonstrate that the mixed-integer
programming approach significantly outperforms several
state-of-the-art adversarial learning alternatives.
Moreover, the retraining framework performs nearly as
well, but scales significantly better. Finally, we show
that our approach is robust to misspecifications of the
adversarial model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mohammadi:2018:COA,
author = "Majid Mohammadi and Amir Ahooye Atashin and Wout
Hofman and Yaohua Tan",
title = "Comparison of Ontology Alignment Systems Across Single
Matching Task Via the {McNemar's} Test",
journal = j-TKDD,
volume = "12",
number = "4",
pages = "51:1--51:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3193573",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Ontology alignment is widely used to find the
correspondences between different ontologies in diverse
fields. After discovering the alignments, several
performance scores are available to evaluate them. The
scores typically require the identified alignment and a
reference containing the underlying actual
correspondences of the given ontologies. The current
trend in the alignment evaluation is to put forward a
new score (e.g., precision, weighted precision,
semantic precision, etc.) and to compare various
alignments by juxtaposing the obtained scores. However,
it is substantially provocative to select one measure
among others for comparison. On top of that, claiming
if one system has a better performance than one another
cannot be substantiated solely by comparing two
scalars. In this article, we propose the statistical
procedures that enable us to theoretically favor one
system over one another. The McNemar's test is the
statistical means by which the comparison of two
ontology alignment systems over one matching task is
drawn. The test applies to a 2 $ \times $ 2 contingency
table, which can be constructed in two different ways
based on the alignments, each of which has their own
merits/pitfalls. The ways of the contingency table
construction and various apposite statistics from the
McNemar's test are elaborated in minute detail. In the
case of having more than two alignment systems for
comparison, the family wise error rate is expected to
happen. Thus, the ways of preventing such an error are
also discussed. A directed graph visualizes the outcome
of the McNemar's test in the presence of multiple
alignment systems. From this graph, it is readily
understood if one system is better than one another or
if their differences are imperceptible. The proposed
statistical methodologies are applied to the systems
participated in the OAEI 2016 anatomy track, and also
compares several well-known similarity metrics for the
same matching problem.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lines:2018:TSC,
author = "Jason Lines and Sarah Taylor and Anthony Bagnall",
title = "Time Series Classification with {HIVE}-{COTE}: The
Hierarchical Vote Collective of Transformation-Based
Ensembles",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "52:1--52:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182382",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "A recent experimental evaluation assessed 19 time
series classification (TSC) algorithms and found that
one was significantly more accurate than all others:
the Flat Collective of Transformation-based Ensembles
(Flat-COTE). Flat-COTE is an ensemble that combines 35
classifiers over four data representations. However,
while comprehensive, the evaluation did not consider
deep learning approaches. Convolutional neural networks
(CNN) have seen a surge in popularity and are now state
of the art in many fields and raises the question of
whether CNNs could be equally transformative for TSC.
We implement a benchmark CNN for TSC using a common
structure and use results from a TSC-specific CNN from
the literature. We compare both to Flat-COTE and find
that the collective is significantly more accurate than
both CNNs. These results are impressive, but Flat-COTE
is not without deficiencies. We significantly improve
the collective by proposing a new hierarchical
structure with probabilistic voting, defining and
including two novel ensemble classifiers built in
existing feature spaces, and adding further modules to
represent two additional transformation domains. The
resulting classifier, the Hierarchical Vote Collective
of Transformation-based Ensembles (HIVE-COTE),
encapsulates classifiers built on five data
representations. We demonstrate that HIVE-COTE is
significantly more accurate than Flat-COTE (and all
other TSC algorithms that we are aware of) over 100
resamples of 85 TSC problems and is the new state of
the art for TSC. Further analysis is included through
the introduction and evaluation of 3 new case studies
and extensive experimentation on 1,000 simulated
datasets of 5 different types.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Amornbunchornvej:2018:CED,
author = "Chainarong Amornbunchornvej and Ivan Brugere and
Ariana Strandburg-Peshkin and Damien R. Farine and
Margaret C. Crofoot and Tanya Y. Berger-Wolf",
title = "Coordination Event Detection and Initiator
Identification in Time Series Data",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "53:1--53:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3201406",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Behavior initiation is a form of leadership and is an
important aspect of social organization that affects
the processes of group formation, dynamics, and
decision-making in human societies and other social
animal species. In this work, we formalize the
Coordination Initiator Inference Problem and propose a
simple yet powerful framework for extracting periods of
coordinated activity and determining individuals who
initiated this coordination, based solely on the
activity of individuals within a group during those
periods. The proposed approach, given arbitrary
individual time series, automatically (1) identifies
times of coordinated group activity, (2) determines the
identities of initiators of those activities, and (3)
classifies the likely mechanism by which the group
coordination occurred, all of which are novel
computational tasks. We demonstrate our framework on
both simulated and real-world data: trajectories
tracking of animals as well as stock market data. Our
method is competitive with existing global leadership
inference methods but provides the first approaches for
local leadership and coordination mechanism
classification. Our results are consistent with
ground-truthed biological data and the framework finds
many known events in financial data which are not
otherwise reflected in the aggregate NASDAQ index. Our
method is easily generalizable to any coordinated time
series data from interacting entities.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "53",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2018:ESC,
author = "Peipei Li and Haixun Wang and Hongsong Li and Xindong
Wu",
title = "Employing Semantic Context for Sparse Information
Extraction Assessment",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "54:1--54:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3201407",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "A huge amount of texts available on the World Wide Web
presents an unprecedented opportunity for information
extraction (IE). One important assumption in IE is that
frequent extractions are more likely to be correct.
Sparse IE is hence a challenging task because no matter
how big a corpus is, there are extractions supported by
only a small amount of evidence in the corpus. However,
there is limited research on sparse IE, especially in
the assessment of the validity of sparse IEs. Motivated
by this, we introduce a lightweight, explicit semantic
approach for assessing sparse IE.$^1$ We first use a
large semantic network consisting of millions of
concepts, entities, and attributes to explicitly model
the context of any semantic relationship. Second, we
learn from three semantic contexts using different base
classifiers to select an optimal classification model
for assessing sparse extractions. Finally, experiments
show that as compared with several state-of-the-art
approaches, our approach can significantly improve the
$F$-score in the assessment of sparse extractions while
maintaining the efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "54",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bollegala:2018:CPM,
author = "Danushka Bollegala and Vincent Atanasov and Takanori
Maehara and Ken-Ichi Kawarabayashi",
title = "{ClassiNet} --- Predicting Missing Features for
Short-Text Classification",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "55:1--55:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3201578",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Short and sparse texts such as tweets, search engine
snippets, product reviews, and chat messages are
abundant on the Web. Classifying such short-texts into
a pre-defined set of categories is a common problem
that arises in various contexts, such as sentiment
classification, spam detection, and information
recommendation. The fundamental problem in short-text
classification is feature sparseness --- the lack of
feature overlap between a trained model and a test
instance to be classified. We propose ClassiNet --- a
network of classifiers trained for predicting missing
features in a given instance, to overcome the feature
sparseness problem. Using a set of unlabeled training
instances, we first learn binary classifiers as feature
predictors for predicting whether a particular feature
occurs in a given instance. Next, each feature
predictor is represented as a vertex v$_i$ in the
ClassiNet, where a one-to-one correspondence exists
between feature predictors and vertices. The weight of
the directed edge e$_{ij}$ connecting a vertex v$_i$ to
a vertex v$_j$ represents the conditional probability
that given v$_i$ exists in an instance, v$_j$ also
exists in the same instance. We show that ClassiNets
generalize word co-occurrence graphs by considering
implicit co-occurrences between features. We extract
numerous features from the trained ClassiNet to
overcome feature sparseness. In particular, for a given
instance x, we find similar features from ClassiNet
that did not appear in x, and append those features in
the representation of x. Moreover, we propose a method
based on graph propagation to find features that are
indirectly related to a given short-text. We evaluate
ClassiNets on several benchmark datasets for short-text
classification. Our experimental results show that by
using ClassiNet, we can statistically significantly
improve the accuracy in short-text classification
tasks, without having to use any external resources
such as thesauri for finding related features.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "55",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qin:2018:STR,
author = "Tian Qin and Wufan Shangguan and Guojie Song and Jie
Tang",
title = "Spatio-Temporal Routine Mining on Mobile Phone Data",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "56:1--56:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3201577",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Mining human behaviors has always been an important
subarea of Data Mining. While it provides empirical
evidences to psychological/behavioral studies, it also
builds the foundation of various big-data systems,
which rely heavily on the prediction of human
behaviors. In recent years, the ubiquitous spreading of
mobile phones and the massive amount of spatio-temporal
data collected from them make it possible to keep track
of the daily commute behaviors of mobile subscribers
and further conduct routine mining on them. In this
article, we propose to model mobile subscribers' daily
commute behaviors by three levels: location trajectory,
one-day pattern, and routine pattern. We develop the
model Spatio-Temporal Routine Mining Model (STRMM) to
characterize the generative process between these three
levels. From daily trajectories, the STRMM model
unsupervisedly extracts spatio-temporal routine
patterns that contain two aspects of information: (1)
How people's typical commute patterns are. (2) How much
their commute behaviors vary from day to day. Compared
to traditional methods, STRMM takes into account the
different degrees of behavioral uncertainty in
different timespans of a day, yielding more realistic
and intuitive results. To learn model parameters, we
adopt Stochastic Expectation Maximization algorithm.
Experiments are conducted on two real world datasets,
and the empirical results show that the STRMM model can
effectively discover hidden routine patterns of human
commute behaviors and yields higher accuracy results in
trajectory prediction task.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "56",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2018:SRI,
author = "Ziqi Zhang and Jie Gao and Fabio Ciravegna",
title = "{SemRe-Rank}: Improving Automatic Term Extraction by
Incorporating Semantic Relatedness with Personalised
{PageRank}",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "57:1--57:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3201408",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Automatic Term Extraction (ATE) deals with the
extraction of terminology from a domain specific
corpus, and has long been an established research area
in data and knowledge acquisition. ATE remains a
challenging task as it is known that there is no
existing ATE methods that can consistently outperform
others in any domain. This work adopts a refreshed
perspective to this problem: instead of searching for
such a `one-size-fit-all' solution that may never
exist, we propose to develop generic methods to
`enhance' existing ATE methods. We introduce
SemRe-Rank, the first method based on this principle,
to incorporate semantic relatedness-an often overlooked
venue-into an existing ATE method to further improve
its performance. SemRe-Rank incorporates word
embeddings into a personalised PageRank process to
compute `semantic importance' scores for candidate
terms from a graph of semantically related words
(nodes), which are then used to revise the scores of
candidate terms computed by a base ATE algorithm.
Extensively evaluated with 13 state-of-the-art base ATE
methods on four datasets of diverse nature, it is shown
to have achieved widespread improvement over all base
methods and across all datasets, with up to 15
percentage points when measured by the Precision in the
top ranked K candidate terms (the average for a set of
K 's), or up to 28 percentage points in F1 measured at
a K that equals to the expected real terms in the
candidates (F1 in short). Compared to an alternative
approach built on the well-known TextRank algorithm,
SemRe-Rank can potentially outperform by up to 8 points
in Precision at top K, or up to 17 points in F1.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "57",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hao:2018:OAL,
author = "Shuji Hao and Peiying Hu and Peilin Zhao and Steven C.
H. Hoi and Chunyan Miao",
title = "Online Active Learning with Expert Advice",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "58:1--58:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3201604",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In literature, learning with expert advice methods
usually assume that a learner always obtain the true
label of every incoming training instance at the end of
each trial. However, in many real-world applications,
acquiring the true labels of all instances can be both
costly and time consuming, especially for large-scale
problems. For example, in the social media, data stream
usually comes in a high speed and volume, and it is
nearly impossible and highly costly to label all of the
instances. In this article, we address this problem
with active learning with expert advice, where the
ground truth of an instance is disclosed only when it
is requested by the proposed active query strategies.
Our goal is to minimize the number of requests while
training an online learning model without sacrificing
the performance. To address this challenge, we propose
a framework of active forecasters, which attempts to
extend two fully supervised forecasters, Exponentially
Weighted Average Forecaster and Greedy Forecaster, to
tackle the task of online active learning (OAL) with
expert advice. Specifically, we proposed two OAL with
expert advice algorithms, named Active Exponentially
Weighted Average Forecaster (AEWAF) and active greedy
forecaster (AGF), by considering the difference of
expert advices. To further improve the robustness of
the proposed AEWAF and AGF algorithms in the noisy
scenarios (where noisy experts exist), we also proposed
two robust active learning with expert advice
algorithms, named Robust Active Exponentially Weighted
Average Forecaster and Robust Active Greedy Forecaster.
We validate the efficacy of the proposed algorithms by
an extensive set of experiments in both normal
scenarios (where all of experts are comparably
reliable) and noisy scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "58",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Silva:2018:DMA,
author = "Fabr{\'\i}cio A. Silva and Augusto C. S. A. Domingues
and Thais R. M. Braga Silva",
title = "Discovering Mobile Application Usage Patterns from a
Large-Scale Dataset",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "59:1--59:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3209669",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The discovering of patterns regarding how, when, and
where users interact with mobile applications reveals
important insights for mobile service providers. In
this work, we exploit for the first time a real and
large-scale dataset representing the records of mobile
application usage of 5,342 users during 2014. The data
was collected by a software agent, installed at the
users' smartphones, which monitors detailed usage of
applications. First, we look for general patterns of
how users access some of the most popular mobile
applications in terms of frequency, duration,
diversity, and data traffic. Next, we mine the dataset
looking for temporal patterns in terms of when and how
often accesses occur. Finally, we exploit the location
of each access to detect users' points of interest and
location-based communities. Based on the results, we
derive a model to generate synthetic datasets of mobile
application usage and evaluate solutions to predict the
next application to be launched. We also discuss a
series of implications of the findings regarding
telecommunication services, mobile advertisements, and
smart cities. This is the first time this dataset is
used, and we also make it publicly available for other
researchers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "59",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2018:CQE,
author = "Feijiang Li and Yuhua Qian and Jieting Wang and
Chuangyin Dang and Bing Liu",
title = "{Cluster}'s Quality Evaluation and Selective
Clustering Ensemble",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "60:1--60:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3211872",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering ensemble has drawn much attention in recent
years due to its ability to generate a high quality and
robust partition result. Weighted clustering ensemble
and selective clustering ensemble are two general ways
to further improve the performance of a clustering
ensemble method. Existing weighted clustering ensemble
methods assign the same weight to each cluster in a
partition of the ensemble. Since the qualities of the
clusters in a partition are different, the clusters
should be weighted differently. To address this issue,
this article proposes a new measure to calculate the
similarity between a cluster and a partition.
Theoretically, this measure is effective in handling
two problems in measuring the quality of a cluster,
which are defined as the symmetric problem and the
context meaning problem. In addition, some properties
of the proposed measure are analyzed. This measure can
be easily expanded to a clustering performance measure
that calculates the similarity between two partitions.
As a result of this measure, we propose a novel
selective clustering ensemble framework, which
considers the differences between the objective of the
ensemble selection stage and the object of the ensemble
integration stage in the selective clustering ensemble.
To verify the performance of the new measure, we
compare the performance of the measure with the two
existing measures in weighting clusters. The
experiments show that the proposed measure is more
effective. To verify the performance of the novel
framework, four existing state-of-the-art selective
clustering ensemble frameworks are employed as
references. The experiments show that the proposed
framework is statistically better than the others on 17
UCI benchmark datasets, 8 document datasets, and the
Olivetti Face Database.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "60",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Riondato:2018:AAB,
author = "Matteo Riondato and Eli Upfal",
title = "{ABRA}: Approximating Betweenness Centrality in Static
and Dynamic Graphs with {Rademacher} Averages",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "61:1--61:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3208351",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "ABPA$ \Xi $A$ \Sigma $ (ABRAXAS): Gnostic word of
mystic meaning. We present ABRA, a suite of algorithms
to compute and maintain probabilistically guaranteed
high-quality approximations of the betweenness
centrality of all nodes (or edges) on both static and
fully dynamic graphs. Our algorithms use progressive
random sampling and their analysis rely on Rademacher
averages and pseudodimension, fundamental concepts from
statistical learning theory. To our knowledge, ABRA is
the first application of these concepts to the field of
graph analysis. Our experimental results show that ABRA
is much faster than exact methods, and vastly
outperforms, in both runtime number of samples, and
accuracy, state-of-the-art algorithms with the same
quality guarantees.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "61",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{DosSantos:2018:RLC,
author = "Ludovic {Dos Santos} and Benjamin Piwowarski and
Ludovic Denoyer and Patrick Gallinari",
title = "Representation Learning for Classification in
Heterogeneous Graphs with Application to Social
Networks",
journal = j-TKDD,
volume = "12",
number = "5",
pages = "62:1--62:??",
month = jul,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3201603",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:46 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "We address the task of node classification in
heterogeneous networks, where the nodes are of
different types, each type having its own set of
labels, and the relations between nodes may also be of
different types. A typical example is provided by
social networks where node types may for example be
users, content, or films, and relations friendship,
like, authorship. Learning and performing inference on
such heterogeneous networks is a recent task requiring
new models and algorithms. We propose a model, Labeling
Heterogeneous Network (LaHNet), a transductive approach
to classification that learns to project the different
types of nodes into a common latent space. This
embedding is learned so as to reflect different
characteristics of the problem such as the correlation
between node labels, as well as the graph topology. The
application focus is on social graphs, but the
algorithm is general and can be used for other domains.
The model is evaluated on five datasets representative
of different instances of social data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "62",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2018:CCE,
author = "Can Wang and Chi-Hung Chi and Zhong She and Longbing
Cao and Bela Stantic",
title = "Coupled Clustering Ensemble by Exploring Data
Interdependence",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "63:1--63:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230967",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Clustering ensembles combine multiple partitions of
data into a single clustering solution. It is an
effective technique for improving the quality of
clustering results. Current clustering ensemble
algorithms are usually built on the pairwise agreements
between clusterings that focus on the similarity via
consensus functions, between data objects that induce
similarity measures from partitions and re-cluster
objects, and between clusters that collapse groups of
clusters into meta-clusters. In most of those models,
there is a strong assumption on IIDness (i.e.,
independent and identical distribution), which states
that base clusterings perform independently of one
another and all objects are also independent. In the
real world, however, objects are generally likely
related to each other through features that are either
explicit or even implicit. There is also latent but
definite relationship among intermediate base
clusterings because they are derived from the same set
of data. All these demand a further investigation of
clustering ensembles that explores the interdependence
characteristics of data. To solve this problem, a new
coupled clustering ensemble ( CCE ) framework that
works on the interdependence nature of objects and
intermediate base clusterings is proposed in this
article. The main idea is to model the coupling
relationship between objects by aggregating the
similarity of base clusterings, and the interactive
relationship among objects by addressing their
neighborhood domains. Once these interdependence
relationships are discovered, they will act as critical
supplements to clustering ensembles. We verified our
proposed framework by using three types of consensus
function: clustering-based, object-based, and
cluster-based. Substantial experiments on multiple
synthetic and real-life benchmark datasets indicate
that CCE can effectively capture the implicit
interdependence relationships among base clusterings
and among objects with higher clustering accuracy,
stability, and robustness compared to 14
state-of-the-art techniques, supported by statistical
analysis. In addition, we show that the final
clustering quality is dependent on the data
characteristics (e.g., quality and consistency) of base
clusterings in terms of sensitivity analysis. Finally,
the applications in document clustering, as well as on
the datasets with much larger size and dimensionality,
further demonstrate the effectiveness, efficiency, and
scalability of our proposed models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "63",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2018:EBQ,
author = "Zhipeng Huang and Bogdan Cautis and Reynold Cheng and
Yudian Zheng and Nikos Mamoulis and Jing Yan",
title = "Entity-Based Query Recommendation for Long-Tail
Queries",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "64:1--64:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3233186",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Query recommendation, which suggests related queries
to search engine users, has attracted a lot of
attention in recent years. Most of the existing
solutions, which perform analysis of users' search
history (or query logs ), are often insufficient for
long-tail queries that rarely appear in query logs. To
handle such queries, we study the use of entities found
in queries to provide recommendations. Specifically, we
extract entities from a query, and use these entities
to explore new ones by consulting an information
source. The discovered entities are then used to
suggest new queries to the user. In this article, we
examine two information sources: (1) a knowledge base
(or KB), such as YAGO and Freebase; and (2) a click
log, which contains the URLs accessed by a query user.
We study how to use these sources to find new entities
useful for query recommendation. We further study a
hybrid framework that integrates different query
recommendation methods effectively. As shown in the
experiments, our proposed approaches provide better
recommendations than existing solutions for long-tail
queries. In addition, our query recommendation process
takes less than 100ms to complete. Thus, our solution
is suitable for providing online query recommendation
services for search engines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "64",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2018:MAD,
author = "Xiaoli Liu and Peng Cao and Andr{\'e} R.
Gon{\c{c}}alves and Dazhe Zhao and Arindam Banerjee",
title = "Modeling {Alzheimer}'s Disease Progression with Fused
{Laplacian} Sparse Group Lasso",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "65:1--65:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230668",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Alzheimer's disease (AD), the most common type of
dementia, not only imposes a huge financial burden on
the health care system, but also a psychological and
emotional burden on patients and their families. There
is thus an urgent need to infer trajectories of
cognitive performance over time and identify biomarkers
predictive of the progression. In this article, we
propose the multi-task learning with fused Laplacian
sparse group lasso model, which can identify biomarkers
closely related to cognitive measures due to its
sparsity-inducing property, and model the disease
progression with a general weighted (undirected)
dependency graphs among the tasks. An efficient
alternative directions method of multipliers based
optimization algorithm is derived to solve the proposed
non-smooth objective formulation. The effectiveness of
the proposed model is demonstrated by its superior
prediction performance over multiple state-of-the-art
methods and accurate identification of compact sets of
cognition-relevant imaging biomarkers that are
consistent with prior medical studies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "65",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{He:2018:SRI,
author = "Xinran He and David Kempe",
title = "Stability and Robustness in Influence Maximization",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "66:1--66:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3233227",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In the well-studied Influence Maximization problem,
the goal is to identify a set of k nodes in a social
network whose joint influence on the network is
maximized. A large body of recent work has justified
research on Influence Maximization models and
algorithms with their potential to create societal or
economic value. However, in order to live up to this
potential, the algorithms must be robust to large
amounts of noise, for they require quantitative
estimates of the influence, which individuals exert on
each other; ground truth for such quantities is
inaccessible, and even decent estimates are very
difficult to obtain. We begin to address this concern
formally. First, we exhibit simple inputs on which even
very small estimation errors may mislead every
algorithm into highly suboptimal solutions. Motivated
by this observation, we propose the Perturbation
Interval model as a framework to characterize the
stability of Influence Maximization against noise in
the inferred diffusion network. Analyzing the
susceptibility of specific instances to estimation
errors leads to a clean algorithmic question, which we
term the Influence Difference Maximization problem.
However, the objective function of Influence Difference
Maximization is NP-hard to approximate within a factor
of $ O(n^{(1 - \epsilon)}) $ for any $ \epsilon > 0 $.
Given the infeasibility of diagnosing instability
algorithmically, we focus on finding influential users
robustly across multiple diffusion settings. We define
a Robust Influence Maximization framework wherein an
algorithm is presented with a set of influence
functions. The algorithm's goal is to identify a set of
k nodes who are simultaneously influential for all
influence functions, compared to the
(function-specific) optimum solutions. We show strong
approximation hardness results for this problem unless
the algorithm gets to select at least a logarithmic
factor more seeds than the optimum solution. However,
when enough extra seeds may be selected, we show that
techniques of Krause et al. can be used to approximate
the optimum robust influence to within a factor of $ 1
- 1 / e $. We evaluate this bicriteria approximation
algorithm against natural heuristics on several
real-world datasets. Our experiments indicate that the
worst-case hardness does not necessarily translate into
bad performance on real-world datasets; all algorithms
perform fairly well.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "66",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Romero-Tris:2018:PPT,
author = "Cristina Romero-Tris and David Meg{\'\i}as",
title = "Protecting Privacy in Trajectories with a User-Centric
Approach",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "67:1--67:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3233185",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "The increased use of location-aware devices, such as
smartphones, generates a large amount of trajectory
data. These data can be useful in several domains, like
marketing, path modeling, localization of an epidemic
focus, and so on. Nevertheless, since trajectory
information contains personal mobility data, improper
use or publication of trajectory data can threaten
users' privacy. It may reveal sensitive details like
habits of behavior, religious beliefs, and sexual
preferences. Therefore, many users might be unwilling
to share their trajectory data without a previous
anonymization process. Currently, several proposals to
address this problem can be found in the literature.
These solutions focus on anonymizing data before its
publication, i.e., when they are already stored in the
server database. Nevertheless, we argue that this
approach gives the user no control about the
information she shares. For this reason, we propose
anonymizing data in the users' mobile devices, before
they are sent to a third party. This article extends
our previous work which was, to the best of our
knowledge, the first one to anonymize data at the
client side, allowing users to select the amount and
accuracy of shared data. In this article, we describe
an improved version of the protocol, and we include the
implementation together with an analysis of the results
obtained after the simulation with real trajectory
data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "67",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ying:2018:FIG,
author = "Josh Jia-Ching Ying and Ji Zhang and Che-Wei Huang and
Kuan-Ta Chen and Vincent S. Tseng",
title = "{FrauDetector+}: an Incremental Graph-Mining Approach
for Efficient Fraudulent Phone Call Detection",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "68:1--68:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3234943",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "In recent years, telecommunication fraud has become
more rampant internationally with the development of
modern technology and global communication. Because of
rapid growth in the volume of call logs, the task of
fraudulent phone call detection is confronted with big
data issues in real-world implementations. Although our
previous work, FrauDetector, addressed this problem and
achieved some promising results, it can be further
enhanced because it focuses only on fraud detection
accuracy, whereas the efficiency and scalability are
not top priorities. Other known approaches for
fraudulent call number detection suffer from long
training times or cannot accurately detect fraudulent
phone calls in real time. However, the learning process
of FrauDetector is too time-consuming to support
real-world application. Although we have attempted to
accelerate the the learning process of FrauDetector by
parallelization, the parallelized learning process,
namely PFrauDetector, still cannot afford the computing
cost. In this article, we propose a highly efficient
incremental graph-mining-based fraudulent phone call
detection approach, namely FrauDetector$^+$, which can
automatically label fraudulent phone numbers with a
``fraud'' tag a crucial prerequisite for distinguishing
fraudulent phone call numbers from nonfraudulent ones.
FrauDetector$^+$ initially generates smaller, more
manageable subnetworks from original graph and performs
a parallelized weighted HITS algorithm for a
significant speed increase in the graph learning
module. It adopts a novel aggregation approach to
generate a trust (or experience) value for each phone
number (or user) based on their respective local
values. After the initial procedure, we can
incrementally update the trust (or experience) value
for each phone number (or user) while a new fraud phone
number is identified. An efficient fraud-centric hash
structure is constructed to support fast real-time
detection of fraudulent phone numbers in the detection
module. We conduct a comprehensive experimental study
based on real datasets collected through an antifraud
mobile application called Whoscall. The results
demonstrate a significantly improved efficiency of our
approach compared with FrauDetector as well as superior
performance against other major classifier-based
methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "68",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Di:2018:LSA,
author = "Mingyang Di and Diego Klabjan and Long Sha and Patrick
Lucey",
title = "Large-Scale Adversarial Sports Play Retrieval with
Learning to Rank",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "69:1--69:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230667",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "As teams of professional leagues are becoming more and
more analytically driven, the interest in effective
data management and access of sports plays has
dramatically increased. In this article, we present a
retrieval system that can quickly find the most
relevant plays from historical games given an input
query. To search through a large number of games at an
interactive speed, our system is built upon a
distributed framework so that each query-result pair is
evaluated in parallel. We also propose a pairwise
learning to rank approach to improve search ranking
based on users' clickthrough behavior. The similarity
metric in training the rank function is based on
automatically learnt features from a convolutional
autoencoder. Finally, we showcase the efficacy of our
learning to rank approach by demonstrating rank quality
in a user study.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "69",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2018:GEF,
author = "Xiao Huang and Jundong Li and Na Zou and Xia Hu",
title = "A General Embedding Framework for Heterogeneous
Information Learning in Large-Scale Networks",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "70:1--70:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241063",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Network analysis has been widely applied in many
real-world tasks, such as gene analysis and targeted
marketing. To extract effective features for these
analysis tasks, network embedding automatically learns
a low-dimensional vector representation for each node,
such that the meaningful topological proximity is well
preserved. While the embedding algorithms on pure
topological structure have attracted considerable
attention, in practice, nodes are often abundantly
accompanied with other types of meaningful information,
such as node attributes, second-order proximity, and
link directionality. A general framework for
incorporating the heterogeneous information into
network embedding could be potentially helpful in
learning better vector representations. However, it
remains a challenging task to jointly embed the
geometrical structure and a distinct type of
information due to the heterogeneity. In addition, the
real-world networks often contain a large number of
nodes, which put demands on the scalability of the
embedding algorithms. To bridge the gap, in this
article, we propose a general embedding framework named
Heterogeneous Information Learning in Large-scale
networks (HILL) to accelerate the joint learning. It
enables the simultaneous node proximity assessing
process to be done in a distributed manner by
decomposing the complex modeling and optimization into
many simple and independent sub-problems. We validate
the significant correlation between the heterogeneous
information and topological structure, and illustrate
the generalizability of HILL by applying it to perform
attributed network embedding and second-order proximity
learning. A variation is proposed for link
directionality modeling. Experimental results on
real-world networks demonstrate the effectiveness and
efficiency of HILL.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "70",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Park:2018:ETS,
author = "Ha-Myung Park and Francesco Silvestri and Rasmus Pagh
and Chin-Wan Chung and Sung-Hyon Myaeng and U. Kang",
title = "Enumerating Trillion Subgraphs On Distributed
Systems",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "71:1--71:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3237191",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "How can we find patterns from an enormous graph with
billions of vertices and edges? The subgraph
enumeration, which is to find patterns from a graph, is
an important task for graph data analysis with many
applications, including analyzing the social network
evolution, measuring the significance of motifs in
biological networks, observing the dynamics of
Internet, and so on. Especially, the triangle
enumeration, a special case of the subgraph
enumeration, where the pattern is a triangle, has many
applications such as identifying suspicious users in
social networks, detecting web spams, and finding
communities. However, recent networks are so large that
most of the previous algorithms fail to process them.
Recently, several MapReduce algorithms have been
proposed to address such large networks; however, they
suffer from the massive shuffled data resulting in a
very long processing time. In this article, we propose
scalable methods for enumerating trillion subgraphs on
distributed systems. We first propose PTE (
Pre-partitioned Triangle Enumeration ), a new
distributed algorithm for enumerating triangles in
enormous graphs by resolving the structural
inefficiency of the previous MapReduce algorithms. PTE
enumerates trillions of triangles in a billion scale
graph by decreasing three factors: the amount of
shuffled data, total work, and network read. We also
propose PSE ( Pre-partitioned Subgraph Enumeration ), a
generalized version of PTE for enumerating subgraphs
that match an arbitrary query graph. Experimental
results show that PTE provides 79 times faster
performance than recent distributed algorithms on
real-world graphs, and succeeds in enumerating more
than 3 trillion triangles on the ClueWeb12 graph with
6.3 billion vertices and 72 billion edges. Furthermore,
PSE successfully enumerates 265 trillion clique
subgraphs with 4 vertices from a subdomain hyperlink
network, showing 47 times faster performance than the
state of the art distributed subgraph enumeration
algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "71",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wen:2018:EAD,
author = "Xidao Wen and Yu-Ru Lin and Konstantinos Pelechrinis",
title = "Event Analytics via Discriminant Tensor
Factorization",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "72:1--72:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3184455",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Analyzing the impact of disastrous events has been
central to understanding and responding to crises.
Traditionally, the assessment of disaster impact has
primarily relied on the manual collection and analysis
of surveys and questionnaires as well as the review of
authority reports. This can be costly and
time-consuming, whereas a timely assessment of an
event's impact is critical for crisis management and
humanitarian operations. In this work, we formulate the
impact discovery as the problem to identify the shared
and discriminative subspace via tensor factorization
due to the multi-dimensional nature of mobility data.
Existing work in mining the shared and discriminative
subspaces typically requires the predefined number of
either type of them. In the context of event impact
discovery, this could be impractical, especially for
those unprecedented events. To overcome this, we
propose a new framework, called ``PairFac,'' that
jointly factorizes the multi-dimensional data to
discover the latent mobility pattern along with its
associated discriminative weight. This framework does
not require splitting the shared and discriminative
subspaces in advance and at the same time automatically
captures the persistent and changing patterns from
multi-dimensional behavioral data. Our work has
important applications in crisis management and urban
planning, which provides a timely assessment of impacts
of major events in the urban environment.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "72",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2018:SSL,
author = "Chaochao Chen and Kevin Chen-Chuan Chang and Qibing Li
and Xiaolin Zheng",
title = "Semi-supervised Learning Meets Factorization: Learning
to Recommend with Chain Graph Model",
journal = j-TKDD,
volume = "12",
number = "6",
pages = "73:1--73:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3264745",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Recently, latent factor model (LFM) has been drawing
much attention in recommender systems due to its good
performance and scalability. However, existing LFMs
predict missing values in a user-item rating matrix
only based on the known ones, and thus the sparsity of
the rating matrix always limits their performance.
Meanwhile, semi-supervised learning (SSL) provides an
effective way to alleviate the label (i.e., rating)
sparsity problem by performing label propagation, which
is mainly based on the smoothness insight on affinity
graphs. However, graph-based SSL suffers serious
scalability and graph unreliable problems when directly
being applied to do recommendation. In this article, we
propose a novel probabilistic chain graph model (CGM)
to marry SSL with LFM. The proposed CGM is a
combination of Bayesian network and Markov random
field. The Bayesian network is used to model the rating
generation and regression procedures, and the Markov
random field is used to model the confidence-aware
smoothness constraint between the generated ratings.
Experimental results show that our proposed CGM
significantly outperforms the state-of-the-art
approaches in terms of four evaluation metrics, and
with a larger performance margin when data sparsity
increases.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "73",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Siddiqui:2019:SFE,
author = "Md Amran Siddiqui and Alan Fern and Thomas G.
Dietterich and Weng-Keen Wong",
title = "Sequential Feature Explanations for Anomaly
Detection",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230666",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3230666",
abstract = "In many applications, an anomaly detection system
presents the most anomalous data instance to a human
analyst, who then must determine whether the instance
is truly of interest (e.g., a threat in a security
setting). Unfortunately, most anomaly detectors provide
no explanation about why an instance was considered
anomalous, leaving the analyst with no guidance about
where to begin the investigation. To address this
issue, we study the problems of computing and
evaluating sequential feature explanations (SFEs) for
anomaly detectors. An SFE of an anomaly is a sequence
of features, which are presented to the analyst one at
a time (in order) until the information contained in
the highlighted features is enough for the analyst to
make a confident judgement about the anomaly. Since
analyst effort is related to the amount of information
that they consider in an investigation, an
explanation's quality is related to the number of
features that must be revealed to attain confidence. In
this article, we first formulate the problem of
optimizing SFEs for a particular density-based anomaly
detector. We then present both greedy algorithms and an
optimal algorithm, based on branch-and-bound search,
for optimizing SFEs. Finally, we provide a large scale
quantitative evaluation of these algorithms using a
novel framework for evaluating explanations. The
results show that our algorithms are quite effective
and that our best greedy algorithm is competitive with
optimal solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2019:DDS,
author = "Xiaoming Liu and Chao Shen and Xiaohong Guan and
Yadong Zhou",
title = "Digger: Detect Similar Groups in Heterogeneous Social
Networks",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3267106",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3267106",
abstract = "People participate in multiple online social networks,
e.g., Facebook, Twitter, and Linkedin, and these social
networks with heterogeneous social content and user
relationship are named as heterogeneous social
networks. Group structure widely exists in
heterogeneous social networks, which reveals the
evolution of human cooperation. Detecting similar
groups in heterogeneous networks has a great
significance for many applications, such as
recommendation system and spammer detection, using the
wealth of group information. Although promising, this
novel problem encounters a variety of technical
challenges, including incomplete data, high time
complexity, and ground truth. To address the research
gap and technical challenges, we take advantage of a
ratio-cut optimization function to model this novel
problem by the linear mixed-effects method and graph
spectral theory. Based on this model, we propose an
efficient algorithm called Digger to detect the similar
groups in the large graphs. Digger consists of three
steps, including measuring user similarity, construct a
matching graph, and detecting similar groups. We adopt
several strategies to lower the computational cost and
detail the basis of labeling the ground truth. We
evaluate the effectiveness and efficiency of our
algorithm on five different types of online social
networks. The extensive experiments show that our
method achieves 0.693, 0.783, and 0.735 in precision,
recall, and F1-measure, which significantly surpass the
state-of-arts by 24.4\%, 15.3\%, and 20.7\%,
respectively. The results demonstrate that our proposal
can detect similar groups in heterogeneous networks
effectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lagree:2019:AOI,
author = "Paul Lagr{\'e}e and Olivier Capp{\'e} and Bogdan
Cautis and Silviu Maniu",
title = "Algorithms for Online Influencer Marketing",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274670",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274670",
abstract = "Influence maximization is the problem of finding
influential users, or nodes, in a graph so as to
maximize the spread of information. It has many
applications in advertising and marketing on social
networks. In this article, we study a highly generic
version of influence maximization, one of optimizing
influence campaigns by sequentially selecting ``spread
seeds'' from a set of influencers, a small subset of
the node population, under the hypothesis that, in a
given campaign, previously activated nodes remain
persistently active. This problem is in particular
relevant for an important form of online marketing,
known as influencer marketing, in which the marketers
target a sub-population of influential people, instead
of the entire base of potential buyers. Importantly, we
make no assumptions on the underlying diffusion model,
and we work in a setting where neither a diffusion
network nor historical activation data are available.
We call this problem online influencer marketing with
persistence (in short, OIMP). We first discuss
motivating scenarios and present our general approach.
We introduce an estimator on the influencers' remaining
potential --- the expected number of nodes that can
still be reached from a given influencer --- and
justify its strength to rapidly estimate the desired
value, relying on real data gathered from Twitter. We
then describe a novel algorithm, GT-UCB, relying on
probabilistic upper confidence bounds on the remaining
potential. We show that our approach leads to
high-quality spreads on both simulated and real
datasets. Importantly, it is orders of magnitude faster
than state-of-the-art influence maximization methods,
making it possible to deal with large-scale online
scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tao:2019:RSE,
author = "Zhiqiang Tao and Hongfu Liu and Sheng Li and Zhengming
Ding and Yun Fu",
title = "Robust Spectral Ensemble Clustering via Rank
Minimization",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3278606",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3278606",
abstract = "Ensemble Clustering (EC) is an important topic for
data cluster analysis. It targets to integrate multiple
Basic Partitions (BPs) of a particular dataset into a
consensus partition. Among previous works, one
promising and effective way is to transform EC as a
graph partitioning problem on the co-association
matrix, which is a pair-wise similarity matrix
summarized by all the BPs in essence. However, most
existing EC methods directly utilize the co-association
matrix, yet without considering various noises (e.g.,
the disagreement between different BPs and the
outliers) that may exist in it. These noises can impair
the cluster structure of a co-association matrix, and
thus mislead the final graph partitioning process. To
address this challenge, we propose a novel Robust
Spectral Ensemble Clustering (RSEC) algorithm in this
article. Specifically, we learn low-rank representation
(LRR) for the co-association matrix to uncover its
cluster structure and handle the noises, and meanwhile,
we perform spectral clustering with the learned
representation to seek for a consensus partition. These
two steps are jointly proceeded within a unified
optimization framework. In particular, during the
optimizing process, we leverage consensus partition to
iteratively enhance the block-diagonal structure of
LRR, in order to assist the graph partitioning. To
solve RSEC, we first formulate it by using nuclear norm
as a convex proxy to the rank function. Then, motivated
by the recent advances in non-convex rank minimization,
we further develop a non-convex model for RSEC and
provide it a solution by the majorization--minimization
Augmented Lagrange Multiplier algorithm. Experiments on
18 real-world datasets demonstrate the effectiveness of
our algorithm compared with state-of-the-art methods.
Moreover, several impact factors on the clustering
performance of our approach are also explored
extensively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jaysawal:2019:PAP,
author = "Bijay Prasad Jaysawal and Jen-Wei Huang",
title = "{PSP}-{AMS}: Progressive Mining of Sequential Patterns
Across Multiple Streams",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3281632",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3281632",
abstract = "Sequential pattern mining is used to find frequent
data sequences over time. When sequential patterns are
generated, the newly arriving patterns may not be
identified as frequent sequential patterns due to the
existence of old data and sequences. Progressive
sequential pattern mining aims to find the most
up-to-date sequential patterns given that obsolete
items will be deleted from the sequences. When
sequences come with multiple data streams, it is
difficult to maintain and update the current sequential
patterns. Even worse, when we consider the sequences
across multiple streams, previous methods cannot
efficiently compute the frequent sequential patterns.
In this work, we propose an efficient algorithm PSP-AMS
to address this problem. PSP-AMS uses a novel data
structure PSP-MS-tree to insert new items, update
current items, and delete obsolete items. By
maintaining a PSP-MS-tree, PSP-AMS efficiently finds
the frequent sequential patterns across multiple
streams. The experimental results show that PSP-AMS
significantly outperforms previous algorithms for
mining of progressive sequential patterns across
multiple streams on synthetic data as well as real
data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Song:2019:TCA,
author = "Qingquan Song and Hancheng Ge and James Caverlee and
Xia Hu",
title = "Tensor Completion Algorithms in Big Data Analytics",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3278607",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3278607",
abstract = "Tensor completion is a problem of filling the missing
or unobserved entries of partially observed tensors.
Due to the multidimensional character of tensors in
describing complex datasets, tensor completion
algorithms and their applications have received wide
attention and achievement in areas like data mining,
computer vision, signal processing, and neuroscience.
In this survey, we provide a modern overview of recent
advances in tensor completion algorithms from the
perspective of big data analytics characterized by
diverse variety, large volume, and high velocity. We
characterize these advances from the following four
perspectives: general tensor completion algorithms,
tensor completion with auxiliary information (variety),
scalable tensor completion algorithms (volume), and
dynamic tensor completion algorithms (velocity).
Further, we identify several tensor completion
applications on real-world data-driven problems and
present some common experimental frameworks popularized
in the literature along with several available software
repositories. Our goal is to summarize these popular
methods and introduce them to researchers and
practitioners for promoting future research and
applications. We conclude with a discussion of key
challenges and promising research directions in this
community for future exploration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Moghaz:2019:TME,
author = "Dror Moghaz and Yaakov Hacohen-Kerner and Dov Gabbay",
title = "Text Mining for Evaluating Authors' Birth and Death
Years",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3281631",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3281631",
abstract = "This article presents a unique method in text and data
mining for finding the era, i.e., mining temporal data,
in which an anonymous author was living. Finding this
era can assist in the examination of a fake document or
extracting the time period in which a writer lived. The
study and the experiments concern Hebrew, and in some
parts, Aramaic and Yiddish rabbinic texts. The rabbinic
texts are undated and contain no bibliographic
sections, posing an interesting challenge. This work
proposes algorithms using key phrases and key words
that allow the temporal organization of citations
together with linguistic patterns. Based on these key
phrases, key words, and the references, we established
several types of ``Iron-clad,'' Heuristic and Greedy
rules for estimating the years of birth and death of a
writer in an interesting classification task.
Experiments were conducted on corpora, including
documents authored by 12, 24, and 36 rabbinic writers
and demonstrated promising results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2019:DRW,
author = "Hung-Hsuan Chen and Pu Chen",
title = "Differentiating Regularization Weights --- A Simple
Mechanism to Alleviate Cold Start in Recommender
Systems",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3285954",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3285954",
abstract = "Matrix factorization (MF) and its extended
methodologies have been studied extensively in the
community of recommender systems in the last decade.
Essentially, MF attempts to search for low-ranked
matrices that can (1) best approximate the known rating
scores, and (2) maintain low Frobenius norm for the
low-ranked matrices to prevent overfitting. Since the
two objectives conflict with each other, the common
practice is to assign the relative importance weights
as the hyper-parameters to these objectives. The two
low-ranked matrices returned by MF are often
interpreted as the latent factors of a user and the
latent factors of an item that would affect the rating
of the user on the item. As a result, it is typical
that, in the loss function, we assign a regularization
weight $ \lambda_p $ on the norms of the latent factors
for all users, and another regularization weight $
\lambda_q $ on the norms of the latent factors for all
the items. We argue that such a methodology probably
over-simplifies the scenario. Alternatively, we
probably should assign lower constraints to the latent
factors associated with the items or users that reveal
more information, and set higher constraints to the
others. In this article, we systematically study this
topic. We found that such a simple technique can
improve the prediction results of the MF-based
approaches based on several public datasets.
Specifically, we applied the proposed methodology on
three baseline models --- SVD, SVD++, and the NMF
models. We found that this technique improves the
prediction accuracy for all these baseline models.
Perhaps more importantly, this technique better
predicts the ratings on the long-tail items, i.e., the
items that were rated/viewed/purchased by few users.
This suggests that this approach may partially remedy
the cold-start issue. The proposed method is very
general and can be easily applied on various
recommendation models, such as Factorization Machines,
Field-aware Factorization Machines, Factorizing
Personalized Markov Chains, Prod2Vec, Behavior2Vec, and
so on. We release the code for reproducibility. We
implemented a Python package that integrates the
proposed regularization technique with the SVD, SVD++,
and the NMF model. The package can be accessed at
https://github.com/ncu-dart/rdf.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sahoo:2019:LSO,
author = "Doyen Sahoo and Steven C. H. Hoi and Bin Li",
title = "Large Scale Online Multiple Kernel Regression with
Application to Time-Series Prediction",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3299875",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Kernel-based regression represents an important family
of learning techniques for solving challenging
regression tasks with non-linear patterns. Despite
being studied extensively, most of the existing work
suffers from two major drawbacks as follows: (i) they
are often designed for solving regression tasks in a
batch learning setting, making them not only
computationally inefficient and but also poorly
scalable in real-world applications where data arrives
sequentially; and (ii) they usually assume that a fixed
kernel function is given prior to the learning task,
which could result in poor performance if the chosen
kernel is inappropriate. To overcome these drawbacks,
this work presents a novel scheme of Online Multiple
Kernel Regression (OMKR), which sequentially learns the
kernel-based regressor in an online and scalable
fashion, and dynamically explore a pool of multiple
diverse kernels to avoid suffering from a single fixed
poor kernel so as to remedy the drawback of
manual/heuristic kernel selection. The OMKR problem is
more challenging than regular kernel-based regression
tasks since we have to on-the-fly determine both the
optimal kernel-based regressor for each individual
kernel and the best combination of the multiple kernel
regressors. We propose a family of OMKR algorithms for
regression and discuss their application to time series
prediction tasks including application to AR, ARMA, and
ARIMA time series. We develop novel approaches to make
OMKR scalable for large datasets, to counter the
problems arising from an unbounded number of support
vectors. We also explore the effect of kernel
combination at prediction level and at the
representation level. Finally, we conduct extensive
experiments to evaluate the empirical performance on
both real-world regression and times series prediction
tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Barton:2019:CIG,
author = "Tomas Barton and Tomas Bruna and Pavel Kordik",
title = "Chameleon 2: an Improved Graph-Based Clustering
Algorithm",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3299876",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Traditional clustering algorithms fail to produce
human-like results when confronted with data of
variable density, complex distributions, or in the
presence of noise. We propose an improved graph-based
clustering algorithm called Chameleon 2, which
overcomes several drawbacks of state-of-the-art
clustering approaches. We modified the internal cluster
quality measure and added an extra step to ensure
algorithm robustness. Our results reveal a significant
positive impact on the clustering quality measured by
Normalized Mutual Information on 32 artificial datasets
used in the clustering literature. This significant
improvement is also confirmed on real-world datasets.
The performance of clustering algorithms such as DBSCAN
is extremely parameter sensitive, and exhaustive manual
parameter tuning is necessary to obtain a meaningful
result. All hierarchical clustering methods are very
sensitive to cutoff selection, and a human expert is
often required to find the true cutoff for each
clustering result. We present an automated cutoff
selection method that enables the Chameleon 2 algorithm
to generate high-quality clustering in autonomous
mode.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Murai:2019:CDU,
author = "Fabricio Murai and Bruno Ribeiro and Don Towlsey and
Pinghui Wang",
title = "Characterizing Directed and Undirected Networks via
Multidimensional Walks with Jumps",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3299877",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Estimating distributions of node characteristics
(labels) such as number of connections or citizenship
of users in a social network via edge and node sampling
is a vital part of the study of complex networks. Due
to its low cost, sampling via a random walk (RW) has
been proposed as an attractive solution to this task.
Most RW methods assume either that the network is
undirected or that walkers can traverse edges
regardless of their direction. Some RW methods have
been designed for directed networks where edges coming
into a node are not directly observable. In this work,
we propose Directed Unbiased Frontier Sampling (DUFS),
a sampling method based on a large number of
coordinated walkers, each starting from a node chosen
uniformly at random. It applies to directed networks
with invisible incoming edges because it constructs, in
real time, an undirected graph consistent with the
walkers trajectories, and its use of random jumps to
prevent walkers from being trapped. DUFS generalizes
previous RW methods and is suited for undirected
networks and to directed networks regardless of in-edge
visibility. We also propose an improved estimator of
node label distribution that combines information from
initial walker locations with subsequent RW
observations. We evaluate DUFS, compare it to other RW
methods, investigate the impact of its parameters on
estimation accuracy and provide practical guidelines
for choosing them. In estimating out-degree
distributions, DUFS yields significantly better
estimates of the head of the distribution than other
methods, while matching or exceeding estimation
accuracy of the tail. Last, we show that DUFS
outperforms uniform sampling when estimating
distributions of node labels of the top 10\% largest
degree nodes, even when sampling a node uniformly has
the same cost as RW steps.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2019:DAA,
author = "Huan Wang and Jia Wu and Wenbin Hu and Xindong Wu",
title = "Detecting and Assessing Anomalous Evolutionary
Behaviors of Nodes in Evolving Social Networks",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3299886",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Based on the performance of entire social networks,
anomaly analysis for evolving social networks generally
ignores the otherness of the evolutionary behaviors of
different nodes, such that it is difficult to precisely
identify the anomalous evolutionary behaviors of nodes
( AEBN ). Assuming that a node's evolutionary behavior
that generates and removes edges normally follows
stable evolutionary mechanisms, this study focuses on
detecting and assessing AEBN, whose evolutionary
mechanisms deviate from their past mechanisms, and
proposes a link prediction detection ( LPD ) method and
a matrix perturbation assessment ( MPA ) method. LPD
describes a node's evolutionary behavior by fitting its
evolutionary mechanism, and designs indexes for edge
generation and removal to evaluate the extent to which
the evolutionary mechanism of a node's evolutionary
behavior can be fitted by a link prediction algorithm.
Furthermore, it detects AEBN by quantifying the
differences among behavior vectors that characterize
the node's evolutionary behaviors in different periods.
In addition, MPA considers AEBN as a perturbation of
the social network structure, and quantifies the effect
of AEBN on the social network structure based on matrix
perturbation analysis. Extensive experiments on eight
disparate real-world networks demonstrate that
analyzing AEBN from the perspective of evolutionary
mechanisms is important and beneficial.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{vanLeeuwen:2019:ASI,
author = "Matthijs van Leeuwen and Polo Chau and Jilles Vreeken
and Dafna Shahaf and Christos Faloutsos",
title = "Addendum to the Special Issue on {Interactive Data
Exploration and Analytics (TKDD, Vol. 12, Iss. 1):
Introduction by the Guest Editors}",
journal = j-TKDD,
volume = "13",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3298786",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jan 29 17:18:49 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Katib:2019:FAS,
author = "Anas Katib and Praveen Rao and Kobus Barnard and
Charles Kamhoua",
title = "Fast Approximate Score Computation on Large-Scale
Distributed Data for Learning Multinomial {Bayesian}
Networks",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "14:1--14:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301304",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301304",
abstract = "In this article, we focus on the problem of learning a
Bayesian network over distributed data stored in a
commodity cluster. Specifically, we address the
challenge of computing the scoring function over
distributed data in an efficient and scalable manner,
which is a fundamental task during learning. While
exact score computation can be done using the
MapReduce-style computation, our goal is to compute
approximate scores much faster with probabilistic error
bounds and in a scalable manner. We propose a novel
approach, which is designed to achieve the following:
(a) decentralized score computation using the principle
of gossiping; (b) lower resource consumption via a
probabilistic approach for maintaining scores using the
properties of a Markov chain; and (c) effective
distribution of tasks during score computation (on
large datasets) by synergistically combining well-known
hashing techniques. We conduct theoretical analysis of
our approach in terms of convergence speed of the
statistics required for score computation, and memory
and network bandwidth consumption. We also discuss how
our approach is capable of efficiently recomputing
scores when new data are available. We conducted a
comprehensive evaluation of our approach and compared
with the MapReduce-style computation using datasets of
different characteristics on a 16-node cluster. When
the MapReduce-style computation provided exact
statistics for score computation, it was nearly 10
times slower than our approach. Although it ran faster
on randomly sampled datasets than on the entire
datasets, it performed worse than our approach in terms
of accuracy. Our approach achieved high accuracy (below
6\% average relative error) in estimating the
statistics for approximate score computation on all the
tested datasets. In conclusion, it provides a feasible
tradeoff between computation time and accuracy for fast
approximate score computation on large-scale
distributed data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gao:2019:TEM,
author = "Xiaofeng Gao and Zhenhao Cao and Sha Li and Bin Yao
and Guihai Chen and Shaojie Tang",
title = "Taxonomy and Evaluation for Microblog Popularity
Prediction",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "15:1--15:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301303",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301303",
abstract = "As social networks become a major source of
information, predicting the outcome of information
diffusion has appeared intriguing to both researchers
and practitioners. By organizing and categorizing the
joint efforts of numerous studies on popularity
prediction, this article presents a hierarchical
taxonomy and helps to establish a systematic overview
of popularity prediction methods for microblog.
Specifically, we uncover three lines of thoughts: the
feature-based approach, time-series modelling, and the
collaborative filtering approach and analyse them,
respectively. Furthermore, we also categorize
prediction methods based on their underlying rationale:
whether they attempt to model the motivation of users
or monitor the early responses. Finally, we put these
prediction methods to test by performing experiments on
real-life data collected from popular social networks
Twitter and Weibo. We compare the methods in terms of
accuracy, efficiency, timeliness, robustness, and bias.
As far as we are concerned, there is no precedented
survey aimed at microblog popularity prediction at the
time of submission. By establishing a taxonomy and
evaluation for the first time, we hope to provide an
in-depth review of state-of-the-art prediction methods
and point out directions for further research. Our
evaluations show that time-series modelling has the
advantage of high accuracy and the ability to improve
over time. The feature-based methods using only
temporal features performs nearly as well as using all
possible features, producing average results. This
suggests that temporal features do have strong
predictive power and that power is better exploited
with time-series models. On the other hand, this
implies that we know little about the future popularity
of an item before it is posted, which may be the focus
of further research.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yan:2019:RBT,
author = "Ruidong Yan and Yi Li and Weili Wu and Deying Li and
Yongcai Wang",
title = "Rumor Blocking through Online Link Deletion on Social
Networks",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "16:1--16:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301302",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301302",
abstract = "In recent years, social networks have become important
platforms for people to disseminate information.
However, we need to take effective measures such as
blocking a set of links to control the negative rumors
spreading over the network. In this article, we propose
a Rumor Spread Minimization (RSM) problem, i.e., we
remove an edge set from network such that the rumor
spread is minimized. We first prove the objective
function of RSM problem is not submodular. Then, we
propose both submodular lower-bound and upper-bound of
the objective function. Next, we develop a heuristic
algorithm to approximate the objective function.
Furthermore, we reformulate our objective function as
the DS function (the Difference of Submodular
functions). Finally, we conduct experiments on
real-world datasets to evaluate our proposed method.
The experiment results show that the upper and lower
bounds are very close, which indicates the good quality
of them. And, the proposed method outperforms the
comparison methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Teinemaa:2019:OOP,
author = "Irene Teinemaa and Marlon Dumas and Marcello {La Rosa}
and Fabrizio Maria Maggi",
title = "Outcome-Oriented Predictive Process Monitoring: Review
and Benchmark",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "17:1--17:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301300",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301300",
abstract = "Predictive business process monitoring refers to the
act of making predictions about the future state of
ongoing cases of a business process, based on their
incomplete execution traces and logs of historical
(completed) traces. Motivated by the increasingly
pervasive availability of fine-grained event data about
business process executions, the problem of predictive
process monitoring has received substantial attention
in the past years. In particular, a considerable number
of methods have been put forward to address the problem
of outcome-oriented predictive process monitoring,
which refers to classifying each ongoing case of a
process according to a given set of possible
categorical outcomes --- e.g., Will the customer
complain or not? Will an order be delivered, canceled,
or withdrawn? Unfortunately, different authors have
used different datasets, experimental settings,
evaluation measures, and baselines to assess their
proposals, resulting in poor comparability and an
unclear picture of the relative merits and
applicability of different methods. To address this
gap, this article presents a systematic review and
taxonomy of outcome-oriented predictive process
monitoring methods, and a comparative experimental
evaluation of eleven representative methods using a
benchmark covering 24 predictive process monitoring
tasks based on nine real-life event logs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ma:2019:PBD,
author = "Liang Ma and Mudhakar Srivatsa and Derya Cansever and
Xifeng Yan and Sue Kase and Michelle Vanni",
title = "Performance Bounds of Decentralized Search in Expert
Networks for Query Answering",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "18:1--18:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3300230",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3300230",
abstract = "Expert networks are formed by a group of
expert-professionals with different specialties to
collaboratively resolve specific queries posted to the
network. In such networks, when a query reaches an
expert who does not have sufficient expertise, this
query needs to be routed to other experts for further
processing until it is completely solved; therefore,
query answering efficiency is sensitive to the
underlying query routing mechanism being used. Among
all possible query routing mechanisms, decentralized
search, operating purely on each expert's local
information without any knowledge of network global
structure, represents the most basic and scalable
routing mechanism, which is applicable to any network
scenarios even in dynamic networks. However, there is
still a lack of fundamental understanding of the
efficiency of decentralized search in expert networks.
In this regard, we investigate decentralized search by
quantifying its performance under a variety of network
settings. Our key findings reveal the existence of
network conditions, under which decentralized search
can achieve significantly short query routing paths
(i.e., between $ O(\log n) $ and $ O(\log^2 n) $ hops,
$n$: total number of experts in the network). Based on
such theoretical foundation, we further study how the
unique properties of decentralized search in expert
networks are related to the anecdotal small-world
phenomenon. In addition, we demonstrate that
decentralized search is robust against estimation
errors introduced by misinterpreting the required
expertise levels. The developed performance bounds,
confirmed by real datasets, are able to assist in
predicting network performance and designing complex
expert networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jha:2019:DMD,
author = "Kishlay Jha and Guangxu Xun and Vishrawas
Gopalakrishnan and Aidong Zhang",
title = "{DWE-Med}: Dynamic Word Embeddings for Medical
Domain",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "19:1--19:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310254",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310254",
abstract = "Recent advances in unsupervised language processing
methods have created an opportunity to exploit massive
text corpora for developing high-quality vector space
representation (also known as word embeddings) of
words. Towards this direction, practitioners have
developed and applied several data driven embedding
models with quite good rate of success. However, a
drawback of these models lies in their premise of
static context; wherein, the meaning of a word is
assumed to remain the same over the period of time.
This is limiting because it is known that the semantic
meaning of a concept evolves over time. While such
semantic drifts are routinely observed in almost all
the domains; their effect is acute in domain such as
biomedicine, where the semantic meaning of a concept
changes relatively fast. To address this, in this
study, we aim to learn temporally aware vector
representation of medical concepts from the timestamped
text data, and in doing so provide a systematic
approach to formalize the problem. More specifically, a
dynamic word embedding based model that jointly learns
the temporal characteristics of medical concepts and
performs across time-alignment is proposed. Apart from
capturing the evolutionary characteristics in an
optimal manner, the model also factors in the implicit
medical properties useful for a variety of bio-medical
applications. Empirical studies conducted on two
important bio-medical use cases validates the
effectiveness of the proposed approach and suggests
that the model not only learns quality embeddings but
also facilitates intuitive trajectory visualizations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cadena:2019:NOP,
author = "Jose Cadena and Feng Chen and Anil Vullikanti",
title = "Near-Optimal and Practical Algorithms for Graph Scan
Statistics with Connectivity Constraints",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "20:1--20:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309712",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309712",
abstract = "One fundamental task in network analysis is detecting
``hotspots'' or ``anomalies'' in the network; that is,
detecting subgraphs where there is significantly more
activity than one would expect given historical data or
some baseline process. Scan statistics is one popular
approach used for anomalous subgraph detection. This
methodology involves maximizing a score function over
all connected subgraphs, which is a challenging
computational problem. A number of heuristics have been
proposed for these problems, but they do not provide
any quality guarantees. Here, we propose a framework
for designing algorithms for optimizing a large class
of scan statistics for networks, subject to
connectivity constraints. Our algorithms run in time
that scales linearly on the size of the graph and
depends on a parameter we call the ``effective solution
size,'' while providing rigorous approximation
guarantees. In contrast, most prior methods have
super-linear running times in terms of graph size.
Extensive empirical evidence demonstrates the
effectiveness and efficiency of our proposed algorithms
in comparison with state-of-the-art methods. Our
approach improves on the performance relative to all
prior methods, giving up to over 25\% increase in the
score. Further, our algorithms scale to networks with
up to a million nodes, which is 1--2 orders of
magnitude larger than all prior applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2019:PFS,
author = "Bingbing Jiang and Chang Li and Maarten {De Rijke} and
Xin Yao and Huanhuan Chen",
title = "Probabilistic Feature Selection and Classification
Vector Machine",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "21:1--21:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309541",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309541",
abstract = "Sparse Bayesian learning is a state-of-the-art
supervised learning algorithm that can choose a subset
of relevant samples from the input data and make
reliable probabilistic predictions. However, in the
presence of high-dimensional data with irrelevant
features, traditional sparse Bayesian classifiers
suffer from performance degradation and low efficiency
due to the incapability of eliminating irrelevant
features. To tackle this problem, we propose a novel
sparse Bayesian embedded feature selection algorithm
that adopts truncated Gaussian distributions as both
sample and feature priors. The proposed algorithm,
called probabilistic feature selection and
classification vector machine (PFCVM$_{LP}$) is able to
simultaneously select relevant features and samples for
classification tasks. In order to derive the analytical
solutions, Laplace approximation is applied to compute
approximate posteriors and marginal likelihoods.
Finally, parameters and hyperparameters are optimized
by the type-II maximum likelihood method. Experiments
on three datasets validate the performance of
PFCVM$_{LP}$ along two dimensions: classification
performance and effectiveness for feature selection.
Finally, we analyze the generalization performance and
derive a generalization error bound for PFCVM$_{LP}$.
By tightening the bound, the importance of feature
selection is demonstrated.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2019:FST,
author = "Zheng Wang and Xiaojun Ye and Chaokun Wang and Philip
S. Yu",
title = "Feature Selection via Transferring Knowledge Across
Different Classes",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "22:1--22:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314202",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314202",
abstract = "The problem of feature selection has attracted
considerable research interest in recent years.
Supervised information is capable of significantly
improving the quality of selected features. However,
existing supervised feature selection methods all
require that classes in the labeled data (source
domain) and unlabeled data (target domain) to be
identical, which may be too restrictive in many cases.
In this article, we consider a more challenging
cross-class setting where the classes in these two
domains are related but different, which has rarely
been studied before. We propose a cross-class knowledge
transfer feature selection framework which transfers
the cross-class knowledge from the source domain to
guide target domain feature selection. Specifically,
high-level descriptions, i.e., attributes, are used as
the bridge for knowledge transfer. To further improve
the quality of the selected features, our framework
jointly considers the tasks of cross-class knowledge
transfer and feature selection. Experimental results on
four benchmark datasets demonstrate the superiority of
the proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hong:2019:VGM,
author = "Junyuan Hong and Yang Li and Huanhuan Chen",
title = "Variant {Grassmann} Manifolds: a Representation
Augmentation Method for Action Recognition",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "23:1--23:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314203",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314203",
abstract = "In classification tasks, classifiers trained with
finite examples might generalize poorly to new data
with unknown variance. For this issue, data
augmentation is a successful solution where numerous
artificial examples are added to training sets. In this
article, we focus on the data augmentation for
improving the accuracy of action recognition, where
action videos are modeled by linear dynamical systems
and approximately represented as linear subspaces.
These subspace representations lie in a non-Euclidean
space, named Grassmann manifold, containing points as
orthonormal matrixes. It is our concern that poor
generalization may result from the variance of
manifolds when data come from different sources or
classes. Thus, we introduce infinitely many variant
Grassmann manifolds (VGM) subject to a known
distribution, then represent each action video as
different Grassmann points leading to augmented
representations. Furthermore, a prior based on the
stability of subspace bases is introduced, so the
manifold distribution can be adaptively determined,
balancing discrimination and representation.
Experimental results of multi-class and multi-source
classification show that VGM softmax classifiers
achieve lower test error rates compared to methods with
a single manifold.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2019:LLS,
author = "Yumeng Guo and Fulai Chung and Guozheng Li and
Jiancong Wang and James C. Gee",
title = "Leveraging Label-Specific Discriminant Mapping
Features for Multi-Label Learning",
journal = j-TKDD,
volume = "13",
number = "2",
pages = "24:1--24:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319911",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:01 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3319911",
abstract = "As an important machine learning task, multi-label
learning deals with the problem where each sample
instance (feature vector) is associated with multiple
labels simultaneously. Most existing approaches focus
on manipulating the label space, such as exploiting
correlations between labels and reducing label space
dimension, with identical feature space in the process
of classification. One potential drawback of this
traditional strategy is that each label might have its
own specific characteristics and using identical
features for all label cannot lead to optimized
performance. In this article, we propose an effective
algorithm named LSDM, i.e., leveraging label-specific
discriminant mapping features for multi-label learning,
to overcome the drawback. LSDM sets diverse ratio
parameter values to conduct cluster analysis on the
positive and negative instances of identical label. It
reconstructs label-specific feature space which
includes distance information and spatial topology
information. Our experimental results show that
combining these two parts of information in the new
feature representation can better exploit the
clustering results in the learning process. Due to the
problem of diverse combinations for identical label, we
employ simplified linear discriminant analysis to
efficiently excavate optimal one for each label and
perform classification by querying the corresponding
results. Comparison with the state-of-the-art
algorithms on a total of 20 benchmark datasets clearly
manifests the competitiveness of LSDM.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gan:2019:SPS,
author = "Wensheng Gan and Jerry Chun-Wei Lin and Philippe
Fournier-Viger and Han-Chieh Chao and Philip S. Yu",
title = "A Survey of Parallel Sequential Pattern Mining",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "25:1--25:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314107",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314107",
abstract = "With the growing popularity of shared resources, large
volumes of complex data of different types are
collected automatically. Traditional data mining
algorithms generally have problems and challenges
including huge memory cost, low processing speed, and
inadequate hard disk space. As a fundamental task of
data mining, sequential pattern mining (SPM) is used in
a wide variety of real-life applications. However, it
is more complex and challenging than other pattern
mining tasks, i.e., frequent itemset mining and
association rule mining, and also suffers from the
above challenges when handling the large-scale data. To
solve these problems, mining sequential patterns in a
parallel or distributed computing environment has
emerged as an important issue with many applications.
In this article, an in-depth survey of the current
status of parallel SPM (PSPM) is investigated and
provided, including detailed categorization of
traditional serial SPM approaches, and state-of-the art
PSPM. We review the related work of PSPM in details
including partition-based algorithms for PSPM,
apriori-based PSPM, pattern-growth-based PSPM, and
hybrid algorithms for PSPM, and provide deep
description (i.e., characteristics, advantages,
disadvantages, and summarization) of these parallel
approaches of PSPM. Some advanced topics for PSPM,
including parallel quantitative/weighted/utility SPM,
PSPM from uncertain data and stream data, hardware
acceleration for PSPM, are further reviewed in details.
Besides, we review and provide some well-known
open-source software of PSPM. Finally, we summarize
some challenges and opportunities of PSPM in the big
data era.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mahmoudi:2019:RBO,
author = "Amin Mahmoudi and Mohd Ridzwan Yaakub and Azuraliza
Abu Bakar",
title = "The Relationship between Online Social Network Ties
and User Attributes",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "26:1--26:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314204",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314204",
abstract = "The distance between users has an effect on the
formation of social network ties, but it is not the
only or even the main factor. Knowing all the features
that influence such ties is very important for many
related domains such as location-based recommender
systems and community and event detection systems for
online social networks (OSNs). In recent years,
researchers have analyzed the role of user geo-location
in OSNs. Researchers have also attempted to determine
the probability of friendships being established based
on distance, where friendship is not only a function of
distance. However, some important features of OSNs
remain unknown. In order to comprehensively understand
the OSN phenomenon, we also need to analyze users'
attributes. Basically, an OSN functions according to
four main user properties: user geo-location, user
weight, number of user interactions, and user lifespan.
The research presented here sought to determine whether
the user mobility pattern can be used to predict users'
interaction behavior. It also investigated whether, in
addition to distance, the number of friends (known as
user weight) interferes in social network tie
formation. To this end, we analyzed the above-stated
features in three large-scale OSNs. We found that
regardless of a high degree freedom in user mobility,
the fraction of the number of outside activities over
the inside activity is a significant fraction that
helps us to address the user interaction behavior. To
the best of our knowledge, research has not been
conducted elsewhere on this issue. We also present a
high-resolution formula in order to improve the
friendship probability function.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2019:MTC,
author = "Yao Zhou and Lei Ying and Jingrui He",
title = "Multi-task Crowdsourcing via an Optimization
Framework",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "27:1--27:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310227",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310227",
abstract = "The unprecedented amounts of data have catalyzed the
trend of combining human insights with machine learning
techniques, which facilitate the use of crowdsourcing
to enlist label information both effectively and
efficiently. One crucial challenge in crowdsourcing is
the diverse worker quality, which determines the
accuracy of the label information provided by such
workers. Motivated by the observations that same set of
tasks are typically labeled by the same set of workers,
we studied their behaviors across multiple related
tasks and proposed an optimization framework for
learning from task and worker dual heterogeneity. The
proposed method uses a weight tensor to represent the
workers' behaviors across multiple tasks, and seeks to
find the optimal solution of the tensor by exploiting
its structured information. Then, we propose an
iterative algorithm to solve the optimization problem
and analyze its computational complexity. To infer the
true label of an example, we construct a worker
ensemble based on the estimated tensor, whose decisions
will be weighted using a set of entropy weight. We also
prove that the gradient of the most time-consuming
updating block is separable with respect to the
workers, which leads to a randomized algorithm with
faster speed. Moreover, we extend the learning
framework to accommodate to the multi-class setting.
Finally, we test the performance of our framework on
several datasets, and demonstrate its superiority over
state-of-the-art techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2019:RRH,
author = "Xuchao Zhang and Shuo Lei and Liang Zhao and Arnold P.
Boedihardjo and Chang-Tien Lu",
title = "Robust Regression via Heuristic Corruption
Thresholding and Its Adaptive Estimation Variation",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "28:1--28:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314105",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314105",
abstract = "The presence of data noise and corruptions has
recently invoked increasing attention on robust
least-squares regression (RLSR), which addresses this
fundamental problem that learns reliable regression
coefficients when response variables can be arbitrarily
corrupted. Until now, the following important
challenges could not be handled concurrently: (1)
rigorous recovery guarantee of regression coefficients,
(2) difficulty in estimating the corruption ratio
parameter, and (3) scaling to massive datasets. This
article proposes a novel Robust regression algorithm
via Heuristic Corruption Thresholding (RHCT) that
concurrently addresses all the above challenges.
Specifically, the algorithm alternately optimizes the
regression coefficients and estimates the optimal
uncorrupted set via heuristic thresholding without a
pre-defined corruption ratio parameter until its
convergence. Moreover, to improve the efficiency of
corruption estimation in large-scale data, a Robust
regression algorithm via Adaptive Corruption
Thresholding (RACT) is proposed to determine the size
of the uncorrupted set in a novel adaptive search
method without iterating data samples exhaustively. In
addition, we prove that our algorithms benefit from
strong guarantees analogous to those of
state-of-the-art methods in terms of convergence rates
and recovery guarantees. Extensive experiments
demonstrate that the effectiveness of our new methods
is superior to that of existing methods in the recovery
of both regression coefficients and uncorrupted sets,
with very competitive efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2019:IDP,
author = "Zhitao Wang and Chengyao Chen and Wenjie Li",
title = "Information Diffusion Prediction with Network
Regularized Role-based User Representation Learning",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "29:1--29:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3314106",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3314106",
abstract = "In this article, we aim at developing a user
representation learning model to solve the information
diffusion prediction problem in social media. The main
idea is to project the diffusion users into a
continuous latent space as the role-based (sender and
receiver) representations, which capture unique
diffusion characteristics of users. The model learns
the role-based representations based on a cascade
modeling objective that aims at maximizing the
likelihood of observed cascades, and employs the matrix
factorization objective of reconstructing structural
proximities as a regularization on representations. By
jointly embedding the information of cascades and
network, the learned representations are robust on
different diffusion data. We evaluate the proposed
model on three real-world datasets. The experimental
results demonstrate the better performance of the
proposed model than state-of-the-art diffusion
embedding and network embedding models and other
popular graph-based methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ju:2019:TRB,
author = "Fujiao Ju and Yanfeng Sun and Junbin Gao and Michael
Antolovich and Junliang Dong and Baocai Yin",
title = "Tensorizing Restricted {Boltzmann} Machine",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "30:1--30:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321517",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3321517",
abstract = "Restricted Boltzmann machine (RBM) is a famous model
for feature extraction and can be used as an
initializer for neural networks. When applying the
classic RBM to multidimensional data such as 2D/3D
tensors, one needs to vectorize such as high-order
data. Vectorizing will result in dimensional disaster
and valuable spatial information loss. As RBM is a
model with fully connected layers, it requires a large
amount of memory. Therefore, it is difficult to use RBM
with high-order data on low-end devices. In this
article, to utilize classic RBM on tensorial data
directly, we propose a new tensorial RBM model
parameterized by the tensor train format (TTRBM). In
this model, both visible and hidden variables are in
tensorial form, which are connected by a parameter
matrix in tensor train format. The biggest advantage of
the proposed model is that TTRBM can obtain comparable
performance compared with the classic RBM with much
fewer model parameters and faster training process. To
demonstrate the advantages of TTRBM, we conduct three
real-world applications, face reconstruction,
handwritten digit recognition, and image
super-resolution in the experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2019:LKI,
author = "Chenyang Liu and Jian Cao and Shanshan Feng",
title = "Leveraging Kernel-Incorporated Matrix Factorization
for App Recommendation",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "31:1--31:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3320482",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3320482",
abstract = "The ever-increasing number of smartphone applications
(apps) available on different app markets poses a
challenge for personalized app recommendation.
Conventional collaborative filtering-based
recommendation methods suffer from sparse and binary
user-app implicit feedback, which results in poor
performance in discriminating user-app preferences. In
this article, we first propose two kernel incorporated
probabilistic matrix factorization models, which
introduce app-categorical information to constrain the
user and app latent features to be similar to their
neighbors in the latent space. The two models are
solved by Stochastic Gradient Descent with a
user-oriented negative sampling scheme. To further
improve the recommendation performance, we construct
pseudo user-app ratings based on user-app usage
information, and propose a novel kernelized
non-negative matrix factorization by incorporating
non-negative constraints on latent factors to predict
user-app preferences. This model also leverages
user--user and app--app similarities with regard to
app-categorical information to mine the latent
geometric structure in the pseudo-rating space.
Adopting the Karush--Kuhn--Tucker conditions, a
Multiplicative Updating Rules based optimization is
proposed for model learning, and the convergence is
proved by introducing an auxiliary function. The
experimental results on a real user-app installation
usage dataset show the comparable performance of our
models with the state-of-the-art baselines in terms of
two ranking-oriented evaluation metrics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dehghan:2019:TDE,
author = "Mahdi Dehghan and Ahmad Ali Abin",
title = "Translations Diversification for Expert Finding: a
Novel Clustering-based Approach",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "32:1--32:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3320489",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3320489",
abstract = "Expert finding is the task of retrieving and ranking
knowledgeable people in the subject of user's query. It
is a well-studied problem that has attracted the
attention of many researchers. The most important
challenge in expert finding is to determine the
similarity between query words and documents authored
by candidate experts. One of the most important
challenges in Information Retrieval (IR) community is
the issue of vocabulary gap between queries and
documents. In this study, a translation model based on
words clustering in two query and co-occurrence spaces
is proposed to overcome this problem. First, the words
that are semantically close, are clustered in a query
space and then each cluster in this space are clustered
again in a co-occurrence space. Representatives of each
cluster in the co-occurrence space are considered as a
diverse subset of the parent cluster. By this method,
the query translations are expected to be diversified
in the query space. Next, a probabilistic model, that
is based on the belonging degree of word to cluster and
similarity of cluster to query in the query space, is
used to consider the problem of vocabulary gap.
Finally, the corresponding translations to each query
are used in conjunction with a combination model for
expert finding. Experiments on Stack Overflow dataset
show the effectiveness of the proposed method for
expert finding.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Iqbal:2019:BPE,
author = "Mohsin Iqbal and Asim Karim and Faisal Kamiran",
title = "Balancing Prediction Errors for Robust Sentiment
Classification",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "33:1--33:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3328795",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3328795",
abstract = "Sentiment classification is a popular text mining task
in which textual content (e.g., a message) is assigned
a polarity label (typically positive or negative)
reflecting the sentiment expressed in it. Sentiment
classification is used widely in applications like
customer feedback analysis where robustness and
correctness of results are critical. In this article,
we highlight that prediction accuracy alone is not
sufficient for assessing the performance of a sentiment
classifier; it is also important that the classifier is
not biased toward positive or negative polarity, thus
distorting the distribution of positive and negative
messages in the predictions. We propose a measure,
called Polarity Bias Rate, for quantifying this bias in
a sentiment classifier. Second, we present two methods
for removing this bias in the predictions of
unsupervised and supervised sentiment classifiers. Our
first method, called Bias-Aware Thresholding (BAT),
shifts the decision boundary to control the bias in the
predictions. Motivated from cost-sensitive learning,
BAT is easily applicable to both lexicon-based
unsupervised and supervised classifiers. Our second
method, called Balanced Logistic Regression (BLR)
introduces a bias-remover constraint into the standard
logistic regression model. BLR is an automatic
bias-free supervised sentiment classifier. We evaluate
our methods extensively on seven real-world datasets.
The experiments involve two lexicon-based and two
supervised sentiment classifiers and include evaluation
on multiple train-test data sizes. The results show
that bias is controlled effectively in predictions.
Furthermore, prediction accuracy is also increased in
many cases, thus enhancing the robustness of sentiment
classification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2019:ICS,
author = "Mingyue Zhang and Xuan Wei and Xunhua Guo and Guoqing
Chen and Qiang Wei",
title = "Identifying Complements and Substitutes of Products: a
Neural Network Framework Based on Product Embedding",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "34:1--34:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3320277",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3320277",
abstract = "Complements and substitutes are two typical product
relationships that deserve consideration in online
product recommendation. One of the key objectives of
recommender systems is to promote cross-selling, which
heavily relies on recommending the appropriate type of
products in specific scenarios. Research on consumer
behavior has shown that consumers usually prefer
substitutes in the browsing stage whereas complements
in the purchasing stage. Thus, it is of great
importance to identify the complementary and
substitutable relationships between products. In this
article, we design a neural network based framework
that integrates the textual content and non-textual
information of online reviews to mine product
relationships. For the textual content, we utilize
methods such as LDA topic modeling to represent
products in a succinct form called ``embedding.'' To
capture the semantics of complementary and
substitutable relationships, we design a modeling
process that transfers the product embeddings into
semantic features and incorporates additional
non-textual factors of product reviews. Extensive
experiments are conducted to verify the effectiveness
of the proposed product relationship mining model. The
advantages and robustness of our model are discussed
from various perspectives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2019:RNC,
author = "Yourong Huang and Zhu Xiao and Xiaoyou Yu and Dong
Wang and Vincent Havyarimana and Jing Bai",
title = "Road Network Construction with Complex Intersections
Based on Sparsely Sampled Private Car Trajectory Data",
journal = j-TKDD,
volume = "13",
number = "3",
pages = "35:1--35:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3326060",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3326060",
abstract = "A road network is a critical aspect of both urban
planning and route recommendation. This article
proposes an efficient approach to build a fine-grained
road network based on sparsely sampled private car
trajectory data under complex urban environment. In
order to resolve difficulties introduced by low
sampling rate trajectory data, we concentrate sample
points around intersections by utilizing the turning
characteristics from the large-scale trajectory data to
ensure the accuracy of the detection of intersections
and road segments. In front of complex road networks
including many complex intersections, such as the
overpasses and underpasses, we first layer
intersections into major and minor one, and then
propose a simplified representation of intersections
and corresponding computable model based on the
features of roads, which can significantly improve the
accuracy of detected road networks, especially for the
complex intersections. In order to construct
fine-grained road networks, we distinguish various
types of intersections using direction information and
detected turning limit. To the best of our knowledge,
our road network building method is the first time to
give fine-grained road networks based on low-sampling
rate private car trajectory data, especially able to
infer the location of complex intersections and its
connections to other intersections. Last but not the
least, we propose an effective parameter selection
process for the Density-Based Spatial Clustering of
Applications with Noise based clustering algorithm,
which is used to implement the reliable intersection
detection. Extensive evaluations are conducted based on
a real-world trajectory dataset from 1,345 private cars
in Futian district, Shenzhen city of China. The results
demonstrate the effectiveness of the proposed method.
The constructed road network matches close to the one
from a public editing map OpenStreetMap, especially the
location of the road intersections and road segments,
which achieves 92.2\% intersections within 20m and
91.6\% road segments within 8m.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dornaika:2019:ATP,
author = "Fadi Dornaika",
title = "Active Two Phase Collaborative Representation
Classifier",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "36:1--36:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3326919",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3326919",
abstract = "The Sparse Representation Classifier, the
Collaborative Representation Classifier (CRC), and the
Two Phase Test Sample Sparse Representation (TPTSSR)
classifier were introduced in recent times. All these
frameworks are supervised and passive in the sense that
they cannot benefit from unlabeled data samples. In
this paper, inspired by active learning paradigms, we
introduce an active CRC that can be used by these
frameworks. More precisely, we are interested in the
TPTSSR framework due to its good performance and its
reasonable computational cost. Our proposed Active Two
Phase Collaborative Representation Classifier (ATPCRC)
starts by predicting the label of the available
unlabeled samples. At testing stage, two coding
processes are carried out separately on the set of
originally labeled samples and the whole set (original
and predicted label). The two types of class-wise
reconstruction errors are blended in order to decide
the class of any test image. Experiments conducted on
four public image datasets show that the proposed
ATPCRC can outperform the classic TPTSSR as well as
many state-of-the-art methods that exploit label and
unlabeled data samples.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2019:TSV,
author = "Wenmain Yang and Kun Wang and Na Ruan and Wenyuan Gao
and Weijia Jia and Wei Zhao and Nan Liu and Yunyong
Zhang",
title = "Time-Sync Video Tag Extraction Using Semantic
Association Graph",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "37:1--37:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3332932",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3332932",
abstract = "Time-sync comments (TSCs) reveal a new way of
extracting the online video tags. However, such TSCs
have lots of noises due to users' diverse comments,
introducing great challenges for accurate and fast
video tag extractions. In this article, we propose an
unsupervised video tag extraction algorithm named
Semantic Weight-Inverse Document Frequency (SW-IDF).
Specifically, we first generate corresponding semantic
association graph (SAG) using semantic similarities and
timestamps of the TSCs. Second, we propose two graph
cluster algorithms, i.e., dialogue-based algorithm and
topic center-based algorithm, to deal with the videos
with different density of comments. Third, we design a
graph iteration algorithm to assign the weight to each
comment based on the degrees of the clustered
subgraphs, which can differentiate the meaningful
comments from the noises. Finally, we gain the weight
of each word by combining Semantic Weight (SW) and
Inverse Document Frequency (IDF). In this way, the
video tags are extracted automatically in an
unsupervised way. Extensive experiments have shown that
SW-IDF (dialogue-based algorithm) achieves 0.4210
F1-score and 0.4932 MAP (Mean Average Precision) in
high-density comments, 0.4267 F1-score and 0.3623 MAP
in low-density comments; while SW-IDF (topic
center-based algorithm) achieves 0.4444 F1-score and
0.5122 MAP in high-density comments, 0.4207 F1-score
and 0.3522 MAP in low-density comments. It has a better
performance than the state-of-the-art unsupervised
algorithms in both F1-score and MAP.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Amelkin:2019:DMA,
author = "Victor Amelkin and Petko Bogdanov and Ambuj K. Singh",
title = "A Distance Measure for the Analysis of Polar Opinion
Dynamics in Social Networks",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "38:1--38:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3332168",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3332168",
abstract = "Analysis of opinion dynamics in social networks plays
an important role in today's life. For predicting
users' political preference, it is particularly
important to be able to analyze the dynamics of
competing polar opinions, such as pro-Democrat vs.
pro-Republican. While observing the evolution of polar
opinions in a social network over time, can we tell
when the network evolved abnormally? Furthermore, can
we predict how the opinions of the users will change in
the future? To answer such questions, it is
insufficient to study individual user behavior, since
opinions can spread beyond users' ego-networks.
Instead, we need to consider the opinion dynamics of
all users simultaneously and capture the connection
between the individuals' behavior and the global
evolution pattern of the social network. In this work,
we introduce the Social Network Distance (SND)-a
distance measure that quantifies the likelihood of
evolution of one snapshot of a social network into
another snapshot under a chosen model of polar opinion
dynamics. SND has a rich semantics of a transportation
problem, yet, is computable in time linear in the
number of users and, as such, is applicable to
large-scale online social networks. In our experiments
with synthetic and Twitter data, we demonstrate the
utility of our distance measure for anomalous event
detection. It achieves a true positive rate of 0.83,
twice as high as that of alternatives. The same
predictions presented in precision-recall space show
that SND retains perfect precision for recall up to
0.2. Its precision then decreases while maintaining
more than 2-fold improvement over alternatives for
recall up to 0.95. When used for opinion prediction in
Twitter data, SND's accuracy is 75.6\%, which is 7.5\%
higher than that of the next best method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2019:MCP,
author = "Haoran Chen and Jinghua Li and Junbin Gao and Yanfeng
Sun and Yongli Hu and Baocai Yin",
title = "Maximally Correlated Principal Component Analysis
Based on Deep Parameterization Learning",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "39:1--39:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3332183",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3332183",
abstract = "Dimensionality reduction is widely used to deal with
high-dimensional data. As a famous dimensionality
reduction method, principal component analysis (PCA)
aiming at finding the low dimension feature of original
data has made great successes, and many improved PCA
algorithms have been proposed. However, most algorithms
based on PCA only consider the linear correlation of
data features. In this article, we propose a novel
dimensionality reduction model called maximally
correlated PCA based on deep parameterization learning
(MCPCADP), which takes nonlinear correlation into
account in the deep parameterization framework for the
purpose of dimensionality reduction. The new model
explores nonlinear correlation by maximizing Ky-Fan
norm of the covariance matrix of nonlinearly mapped
data features. A new BP algorithm for model
optimization is derived. In order to assess the
proposed method, we conduct experiments on both a
synthetic database and several real-world databases.
The experimental results demonstrate that the proposed
algorithm is comparable to several widely used
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gallardo:2019:IVE,
author = "Laura Fern{\'a}ndez Gallardo and Ramon
Sanchez-Iborra",
title = "On the Impact of Voice Encoding and Transmission on
the Predictions of Speaker Warmth and Attractiveness",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "40:1--40:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3332146",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3332146",
abstract = "Modern human-computer interaction systems may not only
be based on interpreting natural language but also on
detecting speaker interpersonal characteristics in
order to determine dialog strategies. This may be of
high interest in different fields such as telephone
marketing or automatic voice-based interactive
services. However, when such systems encounter signals
transmitted over a communication network instead of
clean speech, e.g., in call centers, the speaker
characterization accuracy might be impaired by the
degradations caused in the speech signal by the
encoding and communication processes. This article
addresses a binary classification of high versus low
warm--attractive speakers over different channel and
encoding conditions. The ground truth is derived from
ratings given to clean speech extracted from an
extensive subjective test. Our results show that, under
the considered conditions, the AMR-WB+ codec permits
good levels of classification accuracy, comparable to
the classification with clean, non-degraded speech.
This is especially notable for the case of a Random
Forest-based classifier, which presents the best
performance among the set of evaluated algorithms. The
impact of different packet loss rates has been
examined, whereas jitter effects have been found to be
negligible.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Comito:2019:BED,
author = "Carmela Comito and Agostino Forestiero and Clara
Pizzuti",
title = "Bursty Event Detection in {Twitter} Streams",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "41:1--41:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3332185",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3332185",
abstract = "Social media, in recent years, have become an
invaluable source of information for both public and
private organizations to enhance the comprehension of
people interests and the onset of new events. Twitter,
especially, allows a fast spread of news and events
happening real time that can contribute to situation
awareness during emergency situations, but also to
understand trending topics of a period. The article
proposes an online algorithm that incrementally groups
tweet streams into clusters. The approach summarizes
the examined tweets into the cluster centroid by
maintaining a number of textual and temporal features
that allow the method to effectively discover groups of
interest on particular themes. Experiments on messages
posted by users addressing different issues, and a
comparison with state-of-the-art approaches show that
the method is capable to detect discussions regarding
topics of interest, but also to distinguish bursty
events revealed by a sudden spreading of attention on
messages published by users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qiang:2019:HLT,
author = "Jipeng Qiang and Ping Chen and Wei Ding and Tong Wang
and Fei Xie and Xindong Wu",
title = "Heterogeneous-Length Text Topic Modeling for
Reader-Aware Multi-Document Summarization",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "42:1--42:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3333030",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3333030",
abstract = "More and more user comments like Tweets are available,
which often contain user concerns. In order to meet the
demands of users, a good summary generating from
multiple documents should consider reader interests as
reflected in reader comments. In this article, we focus
on how to generate a summary from multi-document
documents by considering reader comments, named as
reader-aware multi-document summarization (RA-MDS). We
present an innovative topic-based method for RA-MDA,
which exploits latent topics to obtain the most salient
and lessen redundancy summary from multiple documents.
Since finding latent topics for RA-MDS is a crucial
step, we also present a Heterogeneous-length Text Topic
Modeling (HTTM) to extract topics from the corpus that
includes both news reports and user comments, denoted
as heterogeneous-length texts. In this case, the latent
topics extract by HTTM cover not only important aspects
of the event, but also aspects that attract reader
interests. Comparisons on summary benchmark datasets
also confirm that the proposed RA-MDS method is
effective in improving the quality of extracted
summaries. In addition, experimental results
demonstrate that the proposed topic modeling method
outperforms existing topic modeling algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2019:HDE,
author = "Qingyang Li and Zhiwen Yu and Bin Guo and Huang Xu and
Xinjiang Lu",
title = "Housing Demand Estimation Based on Express Delivery
Data",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "43:1--43:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3332522",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3332522",
abstract = "Housing demand estimation is an important topic in the
field of economic research. It is beneficial and
helpful for various applications including real estate
market regulation and urban planning, and therefore is
crucial for both real estate investors and government
administrators. Meanwhile, given the rapid development
of the express industry, abundant useful information is
embedded in express delivery records, which is helpful
for researchers in profiling urban life patterns. The
express delivery behaviors of the residents in a
residential community can reflect the housing demand to
some extent. Although housing demand has been analyzed
in previous studies, its estimation has not been very
good, and the subject remains under explored. To this
end, in this article, we propose a systematic housing
demand estimation method based on express delivery
data. First, the express delivery records are
aggregated on the community scale with the use of
clustering methods, and the missing values in the
records are completed. Then, various features are
extracted from a less sparse dataset considering both
the probability of residential mobility and the
attractiveness of residential communities. In addition,
given that the correlations between different districts
can influence the performances of the inference model,
the commonalities and differences of different
districts are considered. After obtaining the features
and correlations between different districts being
obtained, the housing demand is estimated by using a
multi-task learning method based on neural networks.
The experimental results for real-world data show that
the proposed model is effective at estimating the
housing demand at the residential community level.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sajadmanesh:2019:CTR,
author = "Sina Sajadmanesh and Sogol Bazargani and Jiawei Zhang
and Hamid R. Rabiee",
title = "Continuous-Time Relationship Prediction in Dynamic
Heterogeneous Information Networks",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "44:1--44:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3333028",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3333028",
abstract = "Online social networks, World Wide Web, media, and
technological networks, and other types of so-called
information networks are ubiquitous nowadays. These
information networks are inherently heterogeneous and
dynamic. They are heterogeneous as they consist of
multi-typed objects and relations, and they are dynamic
as they are constantly evolving over time. One of the
challenging issues in such heterogeneous and dynamic
environments is to forecast those relationships in the
network that will appear in the future. In this
article, we try to solve the problem of continuous-time
relationship prediction in dynamic and heterogeneous
information networks. This implies predicting the time
it takes for a relationship to appear in the future,
given its features that have been extracted by
considering both heterogeneity and temporal dynamics of
the underlying network. To this end, we first introduce
a feature extraction framework that combines the power
of meta-path-based modeling and recurrent neural
networks to effectively extract features suitable for
relationship prediction regarding heterogeneity and
dynamicity of the networks. Next, we propose a
supervised non-parametric approach, called
Non-Parametric Generalized Linear Model (Np-Glm), which
infers the hidden underlying probability distribution
of the relationship building time given its features.
We then present a learning algorithm to train Np-Glm
and an inference method to answer time-related queries.
Extensive experiments conducted on synthetic data and
three real-world datasets, namely Delicious, MovieLens,
and DBLP, demonstrate the effectiveness of Np-Glm in
solving continuous-time relationship prediction problem
vis-{\`a}-vis competitive baselines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ben-Gal:2019:CUT,
author = "Irad Ben-Gal and Shahar Weinstock and Gonen Singer and
Nicholas Bambos",
title = "Clustering Users by Their Mobility Behavioral
Patterns",
journal = j-TKDD,
volume = "13",
number = "4",
pages = "45:1--45:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3322126",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3322126",
abstract = "The immense stream of data from mobile devices during
recent years enables one to learn more about human
behavior and provide mobile phone users with
personalized services. In this work, we identify
clusters of users who share similar mobility behavioral
patterns. We analyze trajectories of semantic locations
to find users who have similar mobility ``lifestyle,''
even when they live in different areas. For this task,
we propose a new grouping scheme that is called
Lifestyle-Based Clustering (LBC). We represent the
mobility movement of each user by a Markov model and
calculate the Jensen-Shannon distances among pairs of
users. The pairwise distances are represented by a
similarity matrix, which is used for the clustering. To
validate the unsupervised clustering task, we develop
an entropy-based clustering measure, namely, an index
that measures the homogeneity of mobility patterns
within clusters of users. The analysis is validated on
a real-world dataset that contains location-movements
of 50,000 cellular phone users that were analyzed over
a two-month period.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2019:FGA,
author = "Yanan Xu and Yanmin Zhu and Yanyan Shen and Jiadi Yu",
title = "Fine-Grained Air Quality Inference with Remote Sensing
Data and Ubiquitous Urban Data",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "46:1--46:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3340847",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3340847",
abstract = "Air quality has gained much attention in recent years
and is of great importance to protecting people's
health. Due to the influence of multiple factors, the
limited air quality monitoring stations deployed in
cities are unable to provide fine-grained air quality
information. One cost-effective way is to infer air
quality with records from existing monitoring stations.
However, the severe data sparsity problem (e.g., only
0.2\% data are known) leads to the failure of most
inference methods. We observe that remote sensing data
are of high quality and have a strong correlation with
the air quality. Therefore, we propose to integrate
remote sensing data and ubiquitous urban data for the
air quality inference. But there are two main
challenges, i.e., data heterogeneity and incompleteness
of the remote sensing data. To address the challenges,
we propose a two-stage approach. In the first stage, we
infer and predict air quality conditions of some places
leveraging the remote sensing data and meteorological
data with two proposed ANN-based methods, respectively.
This stage significantly alleviates the data sparsity
problem. In the second stage, the records and estimated
air quality data are put in a tensor. A tensor
decomposition method is applied to complete the tensor.
The features extracted from urban data are classified
into the spatial features (i.e., road features and POI
features) and the temporal features (i.e.,
meteorological features) as the constraints to further
address the data sparsity problem. In addition, an
iterative training framework is proposed to improve the
inference performance. Experiments on a real-world
dataset show that our approach outperforms
state-of-the-art methods, such as U-Air.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2019:PMM,
author = "Xiren Zhou and Huanhuan Chen and Jinlong Li",
title = "Probabilistic Mixture Model for Mapping the
Underground Pipes",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "47:1--47:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3344721",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3344721",
abstract = "Buried pipes beneath our city are blood vessels that
feed human civilization through the supply of water,
gas, electricity, and so on, and mapping the buried
pipes has long been addressed as an issue. In this
article, a suitable coordinate of the detected area is
established, the noisy Ground Penetrating Radar (GPR)
and Global Positioning System (GPS) data are analyzed
and normalized, and the pipeline is described
mathematically. Based on these, the Probabilistic
Mixture Model is proposed to map the buried pipes,
which takes discrete noisy GPR and GPS data as the
input and the accurate pipe locations and directions as
the output. The proposed model consists of the
Preprocessing, the Pipe Fitting algorithm, the
Classification Fitting Expectation Maximization (CFEM)
algorithm, and the Angle-limited Hough (Al-Hough)
transform. The direction information of the detecting
point is added into the measuring of the distance from
the point to nearby pipelines, to handle some areas
where the pipes are intersected or difficult to
classify. The Expectation Maximization (EM) algorithm
is upgraded to CFEM algorithm that is able to classify
detecting points into different classes, and connect
and fit multiple points in each class to get accurate
pipeline locations and directions, and the Al-Hough
transform provides reliable initializations for CFEM,
to some extent, ensuring the convergence of the
proposed model. The experimental results on the
simulated and real-world datasets demonstrate the
effectiveness of the proposed model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2019:BMS,
author = "Fei Jiang and Guosheng Yin and Francesca Dominici",
title = "{Bayesian} Model Selection Approach to Multiple
Change-Points Detection with Non-Local Prior
Distributions",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "48:1--48:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3340804",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3340804",
abstract = "We propose a Bayesian model selection (BMS) boundary
detection procedure using non-local prior distributions
for a sequence of data with multiple systematic mean
changes. By using the non-local priors in the BMS
framework, the BMS method can effectively suppress the
non-boundary spike points with large instantaneous
changes. Further, we speedup the algorithm by reducing
the multiple change points to a series of single change
point detection problems. We establish the consistency
of the estimated number and locations of the change
points under various prior distributions. From both
theoretical and numerical perspectives, we show that
the non-local inverse moment prior leads to the fastest
convergence rate in identifying the true change points
on the boundaries. Extensive simulation studies are
conducted to compare the BMS with existing methods, and
our method is illustrated with application to the
magnetic resonance imaging guided radiation therapy
data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2019:RTE,
author = "Yun Wang and Guojie Song and Lun Du and Zhicong Lu",
title = "Real-Time Estimation of the Urban Air Quality with
Mobile Sensor System",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "49:1--49:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3356584",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3356584",
abstract = "Recently, real-time air quality estimation has
attracted more and more attention from all over the
world, which is close to our daily life. With the
prevalence of mobile sensors, there is an emerging way
to monitor the air quality with mobile sensors on
vehicles. Compared with traditional expensive monitor
stations, mobile sensors are cheaper and more abundant,
but observations from these sensors have unstable
spatial and temporal distributions, which results in
the existing model could not work very well on this
type of data. In this article, taking advantage of air
quality data from mobile sensors, we propose an
real-time urban air quality estimation method based on
the Gaussian Process Regression for air pollution of
the unmonitored areas, pivoting on the diffusion effect
and the accumulation effect of air pollution. In order
to meet the real-time demands, we propose a two-layer
ensemble learning framework and a self-adaptivity
mechanism to improve computational efficiency and
adaptivity. We evaluate our model with real data from
mobile sensor system located in Beijing, China. And the
experiments show that our proposed model is superior to
the state-of-the-art spatial regression methods in both
precision and time performances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xue:2019:SAP,
author = "Yu Xue and Bing Xue and Mengjie Zhang",
title = "Self-Adaptive Particle Swarm Optimization for
Large-Scale Feature Selection in Classification",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "50:1--50:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3340848",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3340848",
abstract = "Many evolutionary computation (EC) methods have been
used to solve feature selection problems and they
perform well on most small-scale feature selection
problems. However, as the dimensionality of feature
selection problems increases, the solution space
increases exponentially. Meanwhile, there are more
irrelevant features than relevant features in datasets,
which leads to many local optima in the huge solution
space. Therefore, the existing EC methods still suffer
from the problem of stagnation in local optima on
large-scale feature selection problems. Furthermore,
large-scale feature selection problems with different
datasets may have different properties. Thus, it may be
of low performance to solve different large-scale
feature selection problems with an existing EC method
that has only one candidate solution generation
strategy (CSGS). In addition, it is time-consuming to
find a suitable EC method and corresponding suitable
parameter values for a given large-scale feature
selection problem if we want to solve it effectively
and efficiently. In this article, we propose a
self-adaptive particle swarm optimization (SaPSO)
algorithm for feature selection, particularly for
large-scale feature selection. First, an encoding
scheme for the feature selection problem is employed in
the SaPSO. Second, three important issues related to
self-adaptive algorithms are investigated. After that,
the SaPSO algorithm with a typical self-adaptive
mechanism is proposed. The experimental results on 12
datasets show that the solution size obtained by the
SaPSO algorithm is smaller than its EC counterparts on
all datasets. The SaPSO algorithm performs better than
its non-EC and EC counterparts in terms of
classification accuracy not only on most training sets
but also on most test sets. Furthermore, as the
dimensionality of the feature selection problem
increases, the advantages of SaPSO become more
prominent. This highlights that the SaPSO algorithm is
suitable for solving feature selection problems,
particularly large-scale feature selection problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Crescenzi:2019:HCM,
author = "Valter Crescenzi and Paolo Merialdo and Disheng Qiu",
title = "Hybrid Crowd-Machine Wrapper Inference",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "51:1--51:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3344720",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3344720",
abstract = "Wrapper inference deals in generating programs to
extract data from Web pages. Several supervised and
unsupervised wrapper inference approaches have been
proposed in the literature. On one hand, unsupervised
approaches produce erratic wrappers: whenever the
sources do not satisfy underlying assumptions of the
inference algorithm, their accuracy is compromised. On
the other hand, supervised approaches produce accurate
wrappers, but since they need training data, their
scalability is limited. The recent advent of
crowdsourcing platforms has opened new opportunities
for supervised approaches, as they make possible the
production of large amounts of training data with the
support of workers recruited online. Nevertheless,
involving human workers has monetary costs. We present
an original hybrid crowd-machine wrapper inference
system that offers the benefits of both approaches
exploiting the cooperation of crowd workers and
unsupervised algorithms. Based on a principled
probabilistic model that estimates the quality of
wrappers, humans workers are recruited only when
unsupervised wrapper induction algorithms are not able
to produce sufficiently accurate solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{He:2019:KSA,
author = "Kun He and Pan Shi and David Bindel and John E.
Hopcroft",
title = "{Krylov} Subspace Approximation for Local Community
Detection in Large Networks",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "52:1--52:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3340708",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3340708",
abstract = "Community detection is an important information mining
task to uncover modular structures in large networks.
For increasingly common large network datasets, global
community detection is prohibitively expensive, and
attention has shifted to methods that mine local
communities, i.e., identifying all latent members of a
particular community from a few labeled seed members.
To address such semi-supervised mining task, we
systematically develop a local spectral (LOSP)
subspace-based community detection method, called LOSP.
We define a family of LOSP subspaces based on Krylov
subspaces, and seek a sparse indicator for the target
community via an $ l_1 $ norm minimization over the
Krylov subspace. Variants of LOSP depend on type of
random walks with different diffusion speeds, type of
random walks, dimension of the LOSP subspace, and step
of diffusions. The effectiveness of the proposed LOSP
approach is theoretically analyzed based on Rayleigh
quotients, and it is experimentally verified on a wide
variety of real-world networks across social,
production, and biological domains, as well as on an
extensive set of synthetic LFR benchmark datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bergamini:2019:CTK,
author = "Elisabetta Bergamini and Michele Borassi and Pierluigi
Crescenzi and Andrea Marino and Henning Meyerhenke",
title = "Computing top-$k$ Closeness Centrality Faster in
Unweighted Graphs",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "53:1--53:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3344719",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3344719",
abstract = "Given a connected graph $ G = (V, E) $, where $V$
denotes the set of nodes and $E$ the set of edges of
the graph, the length (that is, the number of edges) of
the shortest path between two nodes $v$ and $w$ is
denoted by $ d(v, w)$. The closeness centrality of a
vertex $v$ is then defined as $ n = 1 / \Sigma_{w \in
V} d(v, w)$, where $ n = | V |$. This measure is widely
used in the analysis of real-world complex networks,
and the problem of selecting the $k$ most central
vertices has been deeply analyzed in the last decade.
However, this problem is computationally not easy,
especially for large networks: in the first part of the
article, we prove that it is not solvable in time $ O(|
E |^{2 = \epsilon })$ on directed graphs, for any
constant $ \epsilon > 0$, under reasonable complexity
assumptions. Furthermore, we propose a new algorithm
for selecting the $k$ most central nodes in a graph: we
experimentally show that this algorithm improves
significantly both the textbook algorithm, which is
based on computing the distance between all pairs of
vertices, and the state of the art. For example, we are
able to compute the top $k$ nodes in few dozens of
seconds in real-world networks with millions of nodes
and edges. Finally, as a case study, we compute the 10
most central actors in the Internet Movie Database
(IMDB) collaboration network, where two actors are
linked if they played together in a movie, and in the
Wikipedia citation network, which contains a directed
edge from a page $p$ to a page $q$ if $p$ contains a
link to $q$.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "53",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tatti:2019:DFG,
author = "Nikolaj Tatti",
title = "Density-Friendly Graph Decomposition",
journal = j-TKDD,
volume = "13",
number = "5",
pages = "54:1--54:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3344210",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:02 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3344210",
abstract = "Decomposing a graph into a hierarchical structure via
$k$-core analysis is a standard operation in any modern
graph-mining toolkit. $k$-core decomposition is a
simple and efficient method that allows to analyze a
graph beyond its mere degree distribution. More
specifically, it is used to identify areas in the graph
of increasing centrality and connectedness, and it
allows to reveal the structural organization of the
graph. Despite the fact that $k$-core analysis relies
on vertex degrees, $k$-cores do not satisfy a certain,
rather natural, density property. Simply put, the most
central $k$-core is not necessarily the densest
subgraph. This inconsistency between $k$-cores and
graph density provides the basis of our study. We start
by defining what it means for a subgraph to be locally
dense, and we show that our definition entails a nested
chain decomposition of the graph, similar to the one
given by $k$-cores, but in this case the components are
arranged in order of increasing density. We show that
such a locally dense decomposition for a graph $ G =
(V, E)$ can be computed in polynomial time. The running
time of the exact decomposition algorithm is $ O(| V
|^2 | E |)$ but is significantly faster in practice. In
addition, we develop a linear-time algorithm that
provides a factor-2 approximation to the optimal
locally dense decomposition. Furthermore, we show that
the $k$-core decomposition is also a factor-2
approximation, however, as demonstrated by our
experimental evaluation, in practice $k$-cores have
different structure than locally dense subgraphs, and
as predicted by the theory, $k$-cores are not always
well-aligned with graph density.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "54",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2019:AAL,
author = "Peisong Zhu and Zhuang Chen and Haojie Zheng and
Tieyun Qian",
title = "Aspect Aware Learning for Aspect Category Sentiment
Analysis",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "55:1--55:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3350487",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3350487",
abstract = "Aspect category sentiment analysis (ACSA) is an
underexploited subtask in aspect level sentiment
analysis. It aims to identify the sentiment of
predefined aspect categories. The main challenge in
ACSA comes from the fact that the aspect category may
not occur in the sentence in most of the cases. For
example, the review `` they have delicious sandwiches
'' positively talks about the aspect category `` food
'' in an implicit manner. In this article, we propose a
novel aspect aware learning (AAL) framework for ACSA
tasks. Our key idea is to exploit the interaction
between the aspect category and the contents under the
guidance of both sentiment polarity and predefined
categories. To this end, we design a two-way memory
network for integrating AAL into the framework of
sentiment classification. We further present two
algorithms to incorporate the potential impacts of
aspect categories. One is to capture the correlations
between aspect terms and the aspect category like
``sandwiches'' and ``food.'' The other is to recognize
the aspect category for sentiment representations like
``food'' for ``delicious.'' We conduct extensive
experiments on four SemEval datasets. The results
reveal the essential role of AAL in ACSA by achieving
the state-of-the-art performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "55",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2019:UFM,
author = "Yuandong Wang and Xuelian Lin and Hua Wei and Tianyu
Wo and Zhou Huang and Yong Zhang and Jie Xu",
title = "A Unified Framework with Multi-source Data for
Predicting Passenger Demands of Ride Services",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "56:1--56:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3355563",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3355563",
abstract = "Ride-hailing applications have been offering
convenient ride services for people in need. However,
such applications still suffer from the issue of
supply-demand disequilibrium, which is a typical
problem for traditional taxi services. With effective
predictions on passenger demands, we can alleviate the
disequilibrium by pre-dispatching, dynamic pricing or
avoiding dispatching cars to zero-demand areas.
Existing studies of demand predictions mainly utilize
limited data sources, trajectory data, or orders of
ride services or both of them, which also lacks a
multi-perspective consideration. In this article, we
present a unified framework with a new combined model
and a road-network-based spatial partition to leverage
multi-source data and model the passenger demands from
temporal, spatial, and zero-demand-area perspectives.
In addition, our framework realizes offline training
and online predicting, which can satisfy the real-time
requirement more easily. We analyze and evaluate the
performance of our combined model using the actual
operational data from UCAR. The experimental results
indicate that our model outperforms baselines on both
Mean Absolute Error and Root Mean Square Error on
average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "56",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2019:CLL,
author = "Shenghua Liu and Huawei Shen and Houdong Zheng and
Xueqi Cheng and Xiangwen Liao",
title = "{CT LIS}: Learning Influences and Susceptibilities
through Temporal Behaviors",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "57:1--57:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3363570",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3363570",
abstract = "How to quantify influences between users, seeing that
social network users influence each other in their
temporal behaviors? Previous work has directly defined
an independent model parameter to capture the
interpersonal influence between each pair of users. To
do so, these models need a parameter for each pair of
users, which results in high-dimensional models
becoming easily trapped into the overfitting problem.
However, such models do not consider how influences
depend on each other if influences are sent from the
same user or if influences are received by the same
user. Therefore, we propose a model that defines
parameters for every user with a latent influence
vector and a susceptibility vector, opposite to define
influences on user pairs. Such low-dimensional
representations naturally cause the interpersonal
influences involving the same user to be coupled with
each other, thus reducing the model's complexity.
Additionally, the model can easily consider the
temporal information and sentimental polarities of
users' messages. Finally, we conduct extensive
experiments on two real-world Microblog datasets,
showing that our model with such representations
achieves best performance on three prediction tasks,
compared to the state-of-the-art and pair-wise
baselines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "57",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2019:HUI,
author = "Jimmy Ming-Tai Wu and Jerry Chun-Wei Lin and Ashish
Tamrakar",
title = "High-Utility Itemset Mining with Effective Pruning
Strategies",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "58:1--58:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3363571",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3363571",
abstract = "High-utility itemset mining is a popular data mining
problem that considers utility factors, such as
quantity and unit profit of items besides frequency
measure from the transactional database. It helps to
find the most valuable and profitable products/items
that are difficult to track by using only the frequent
itemsets. An item might have a high-profit value which
is rare in the transactional database and has a
tremendous importance. While there are many existing
algorithms to find high-utility itemsets (HUIs) that
generate comparatively large candidate sets, our main
focus is on significantly reducing the computation time
with the introduction of new pruning strategies. The
designed pruning strategies help to reduce the
visitation of unnecessary nodes in the search space,
which reduces the time required by the algorithm. In
this article, two new stricter upper bounds are
designed to reduce the computation time by refraining
from visiting unnecessary nodes of an itemset. Thus,
the search space of the potential HUIs can be greatly
reduced, and the mining procedure of the execution time
can be improved. The proposed strategies can also
significantly minimize the transaction database
generated on each node. Experimental results showed
that the designed algorithm with two pruning strategies
outperform the state-of-the-art algorithms for mining
the required HUIs in terms of runtime and number of
revised candidates. The memory usage of the designed
algorithm also outperforms the state-of-the-art
approach. Moreover, a multi-thread concept is also
discussed to further handle the problem of big
datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "58",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Henzgen:2019:MRD,
author = "Sascha Henzgen and Eyke H{\"u}llermeier",
title = "Mining Rank Data",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "59:1--59:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3363572",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3363572",
abstract = "The problem of frequent pattern mining has been
studied quite extensively for various types of data,
including sets, sequences, and graphs. Somewhat
surprisingly, another important type of data, namely
rank data, has received very little attention in data
mining so far. In this article, we therefore address
the problem of mining rank data, that is, data in the
form of rankings (total orders) of an underlying set of
items. More specifically, two types of patterns are
considered, namely frequent rankings and dependencies
between such rankings in the form of association rules.
Algorithms for mining frequent rankings and frequent
closed rankings are proposed and tested experimentally,
using both synthetic and real data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "59",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Roseberry:2019:MLP,
author = "Martha Roseberry and Bartosz Krawczyk and Alberto
Cano",
title = "Multi-Label Punitive {kNN} with Self-Adjusting Memory
for Drifting Data Streams",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "60:1--60:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3363573",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3363573",
abstract = "In multi-label learning, data may simultaneously
belong to more than one class. When multi-label data
arrives as a stream, the challenges associated with
multi-label learning are joined by those of data stream
mining, including the need for algorithms that are fast
and flexible, able to match both the speed and evolving
nature of the stream. This article presents a punitive
$k$ nearest neighbors algorithm with a self-adjusting
memory (MLSAMPkNN) for multi-label, drifting data
streams. The memory adjusts in size to contain only the
current concept and a novel punitive system identifies
and penalizes errant data examples early, removing them
from the window. By retaining and using only data that
are both current and beneficial, MLSAMPkNN is able to
adapt quickly and efficiently to changes within the
data stream while still maintaining a low computational
complexity. Additionally, the punitive removal
mechanism offers increased robustness to various
data-level difficulties present in data streams, such
as class imbalance and noise. The experimental study
compares the proposal to 24 algorithms using 30
real-world and 15 artificial multi-label data streams
on six multi-label metrics, evaluation time, and memory
consumption. The superior performance of the proposed
method is validated through non-parametric statistical
analysis, proving both high accuracy and low time
complexity. MLSAMPkNN is a versatile classifier,
capable of returning excellent performance in diverse
stream scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "60",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lei:2019:IRU,
author = "Yu Lei and Wenjie Li",
title = "Interactive Recommendation with User-Specific Deep
Reinforcement Learning",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "61:1--61:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3359554",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3359554",
abstract = "In this article, we study a multi-step interactive
recommendation problem for explicit-feedback
recommender systems. Different from the existing works,
we propose a novel user-specific deep reinforcement
learning approach to the problem. Specifically, we
first formulate the problem of interactive
recommendation for each target user as a Markov
decision process (MDP). We then derive a multi-MDP
reinforcement learning task for all involved users. To
model the possible relationships (including
similarities and differences) between different users'
MDPs, we construct user-specific latent states by using
matrix factorization. After that, we propose a
user-specific deep Q-learning (UDQN) method to estimate
optimal policies based on the constructed user-specific
latent states. Furthermore, we propose Biased UDQN
(BUDQN) to explicitly model user-specific information
by employing an additional bias parameter when
estimating the Q-values for different users. Finally,
we validate the effectiveness of our approach by
comprehensive experimental results and analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "61",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lee:2019:AMG,
author = "John Boaz Lee and Ryan A. Rossi and Sungchul Kim and
Nesreen K. Ahmed and Eunyee Koh",
title = "Attention Models in Graphs: a Survey",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "62:1--62:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3363574",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3363574",
abstract = "Graph-structured data arise naturally in many
different application domains. By representing data as
graphs, we can capture entities (i.e., nodes) as well
as their relationships (i.e., edges) with each other.
Many useful insights can be derived from
graph-structured data as demonstrated by an
ever-growing body of work focused on graph mining.
However, in the real-world, graphs can be both
large-with many complex patterns-and noisy, which can
pose a problem for effective graph mining. An effective
way to deal with this issue is to incorporate
``attention'' into graph mining solutions. An attention
mechanism allows a method to focus on task-relevant
parts of the graph, helping it to make better
decisions. In this work, we conduct a comprehensive and
focused survey of the literature on the emerging field
of graph attention models. We introduce three intuitive
taxonomies to group existing work. These are based on
problem setting (type of input and output), the type of
attention mechanism used, and the task (e.g., graph
classification, link prediction). We motivate our
taxonomies through detailed examples and use each to
survey competing approaches from a unique standpoint.
Finally, we highlight several challenges in the area
and discuss promising directions for future work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "62",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2019:PCM,
author = "Wangdong Yang and Kenli Li and Keqin Li",
title = "A Pipeline Computing Method of {SpTV} for Three-Order
Tensors on {CPU} and {GPU}",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "63:1--63:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3363575",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3363575",
abstract = "Tensors have drawn a growing attention in many
applications, such as physics, engineering science,
social networks, recommended systems. Tensor
decomposition is the key to explore the inherent
intrinsic data relationship of tensor. There are many
sparse tensor and vector multiplications (SpTV) in
tensor decomposition. We analyze a variety of storage
formats of sparse tensors and develop a piecewise
compression strategy to improve the storage efficiency
of large sparse tensors. This compression strategy can
avoid storing a large number of empty slices and empty
fibers in sparse tensors, and thus the storage space is
significantly reduced. A parallel algorithm for the
SpTV based on the high-order compressed format based on
slices is designed to greatly improve its computing
performance on graphics processing unit. Each tensor is
cut into multiple slices to form a series of sparse
matrix and vector multiplications, which form the
pipelined parallelism. The transmission time of the
slices can be hidden through pipelined parallel to
further optimize the performance of the SpTV.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "63",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2019:RMS,
author = "Yu Zhou and Jianbin Huang and Heli Sun and Yizhou Sun
and Shaojie Qiao and Stephen Wambura",
title = "Recurrent Meta-Structure for Robust Similarity Measure
in Heterogeneous Information Networks",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "64:1--64:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3364226",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3364226",
abstract = "Similarity measure is one of the fundamental task in
heterogeneous information network (HIN) analysis. It
has been applied to many areas, such as product
recommendation, clustering, and Web search. Most of the
existing metrics can provide personalized services for
users by taking a meta-path or meta-structure as input.
However, these metrics may highly depend on the
user-specified meta-path or meta-structure. In
addition, users must know how to select an appropriate
meta-path or meta-structure. In this article, we
propose a novel similarity measure in HINs, called
Recurrent Meta-Structure (RecurMS)-based Similarity
(RMSS). The RecurMS as a schematic structure in HINs
provides a unified framework for integrating all of the
meta-paths and meta-structures, and can be constructed
automatically by means of repetitively traversing the
network schema. In order to formalize the semantics,
the RecurMS is decomposed into several recurrent
meta-paths and recurrent meta-trees, and we then define
the commuting matrices of the recurrent meta-paths and
meta-trees. All of these commuting matrices are
combined together according to different weights. We
propose two kinds of weighting strategies to determine
the weights. The first is called the local weighting
strategy that depends on the sparsity of the commuting
matrices, and the second is called the global weighting
strategy that depends on the strength of the commuting
matrices. As a result, RMSS is defined by means of the
weighted summation of the commuting matrices. Note that
RMSS can also provide personalized services for users
by means of the weights of the recurrent meta-paths and
meta-trees. Experimental evaluations show that the
proposed RMSS is robust and outperforms the existing
metrics in terms of ranking and clustering task.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "64",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Veloso:2019:SSM,
author = "Br{\'a}ulio M. Veloso and Renato M. Assun{\c{c}}{\~a}o
and Anderson A. Ferreira and Nivio Ziviani",
title = "In Search of a Stochastic Model for the E-News
Reader",
journal = j-TKDD,
volume = "13",
number = "6",
pages = "65:1--65:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362695",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Dec 18 14:31:03 MST 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3362695",
abstract = "E-news readers have increasingly at their disposal a
broad set of news articles to read. Online newspaper
sites use recommender systems to predict and to offer
relevant articles to their users. Typically, these
recommender systems do not leverage users' reading
behavior. If we know how the topics-reads change in a
reading session, we may lead to fine-tuned
recommendations, for example, after reading a certain
number of sports items, it may be counter-productive to
keep recommending other sports news. The motivation for
this article is the assumption that understanding user
behavior when reading successive online news articles
can help in developing better recommender systems. We
propose five categories of stochastic models to
describe this behavior depending on how the previous
reading history affects the future choices of topics.
We instantiated these five classes with many different
stochastic processes covering short-term memory,
revealed-preference, cumulative advantage, and
geometric sojourn models. Our empirical study is based
on large datasets of E-news from two online newspapers.
We collected data from more than 13 million users who
generated more than 23 million reading sessions, each
one composed by the successive clicks of the users on
the posted news. We reduce each user session to the
sequence of reading news topics. The models were fitted
and compared using the Akaike Information Criterion and
the Brier Score. We found that the best models are
those in which the user moves through topics influenced
only by their most recent readings. Our models were
also better to predict the next reading than the
recommender systems currently used in these journals
showing that our models can improve user
satisfaction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "65",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hasan:2020:NSA,
author = "Md Kamrul Hasan and Christopher Pal",
title = "A New Smooth Approximation to the Zero One Loss with a
Probabilistic Interpretation",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "1:1--1:28",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365672",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365672",
abstract = "We examine a new form of smooth approximation to the
zero one loss in which learning is performed using a
reformulation of the widely used logistic function. Our
approach is based on using the posterior mean of a
novel generalized Beta-Bernoulli \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mitra:2020:UMV,
author = "Sayantan Mitra and Mohammed Hasanuzzaman and Sriparna
Saha",
title = "A Unified Multi-view Clustering Algorithm Using
Multi-objective Optimization Coupled with Generative
Model",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "2:1--2:31",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365673",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365673",
abstract = "There is a large body of works on multi-view
clustering that exploit multiple representations (or
views) of the same input data for better convergence.
These multiple views can come from multiple modalities
(image, audio, text) or different feature \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ni:2020:LOC,
author = "Li Ni and Wenjian Luo and Wenjie Zhu and Bei Hua",
title = "Local Overlapping Community Detection",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "3:1--3:25",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3361739",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3361739",
abstract = "Local community detection refers to finding the
community that contains the given node based on local
information, which becomes very meaningful when global
information about the network is unavailable or
expensive to acquire. Most studies on local \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Angiulli:2020:CCF,
author = "Fabrizio Angiulli",
title = "{CFOF}: a Concentration Free Measure for Anomaly
Detection",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "4:1--4:53",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362158",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362158",
abstract = "We present a novel notion of outlier, called the
Concentration Free Outlier Factor, or CFOF. As a main
contribution, we formalize the notion of concentration
of outlier scores and theoretically prove that CFOF
does not concentrate in the Euclidean space \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Arifuzzaman:2020:FPA,
author = "Shaikh Arifuzzaman and Maleq Khan and Madhav Marathe",
title = "Fast Parallel Algorithms for Counting and Listing
Triangles in Big Graphs",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "5:1--5:34",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365676",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365676",
abstract = "Big graphs (networks) arising in numerous application
areas pose significant challenges for graph analysts as
these graphs grow to billions of nodes and edges and
are prohibitively large to fit in the main memory.
Finding the number of triangles in a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kuang:2020:TEE,
author = "Kun Kuang and Peng Cui and Bo Li and Meng Jiang and
Yashen Wang and Fei Wu and Shiqiang Yang",
title = "Treatment Effect Estimation via Differentiated
Confounder Balancing and Regression",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "6:1--6:25",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365677",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365677",
abstract = "Treatment effect plays an important role on decision
making in many fields, such as social marketing,
healthcare, and public policy. The key challenge on
estimating treatment effect in the wild observational
studies is to handle confounding bias induced
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jan:2020:ECC,
author = "Zohaib Md. Jan and Brijesh Verma",
title = "Evolutionary Classifier and Cluster Selection Approach
for Ensemble Classification",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "7:1--7:18",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3366633",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3366633",
abstract = "Ensemble classifiers improve the classification
performance by combining several classifiers using a
suitable fusion methodology. Many ensemble classifier
generation methods have been developed that allowed the
training of multiple classifiers on a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Queiroz-Sousa:2020:ROT,
author = "Paulo Orlando Queiroz-Sousa and Ana Carolina Salgado",
title = "A Review on {OLAP} Technologies Applied to Information
Networks",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "8:1--8:25",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3370912",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3370912",
abstract = "Many real systems produce network data or highly
interconnected data, which can be called information
networks. These information networks form a critical
component in modern information infrastructure,
constituting a large graph data volume. The \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nie:2020:ALL,
author = "Feiping Nie and Zheng Wang and Rong Wang and Zhen Wang
and Xuelong Li",
title = "Adaptive Local Linear Discriminant Analysis",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "9:1--9:19",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369870",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369870",
abstract = "Dimensionality reduction plays a significant role in
high-dimensional data processing, and Linear
Discriminant Analysis (LDA) is a widely used supervised
dimensionality reduction approach. However, a major
drawback of LDA is that it is incapable of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lu:2020:ILS,
author = "Xinjiang Lu and Zhiwen Yu and Chuanren Liu and Yanchi
Liu and Hui Xiong and Bin Guo",
title = "Inferring Lifetime Status of Point-of-Interest: a
Multitask Multiclass Approach",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "10:1--10:27",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369799",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369799",
abstract = "A Point-of-Interest (POI) refers to a specific
location that people may find useful or interesting. In
modern cities, a large number of POIs emerge, grow,
stabilize for a period, then finally disappear. The
stages (e.g., emerge and grow) in this process
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Galimberti:2020:CDM,
author = "Edoardo Galimberti and Francesco Bonchi and Francesco
Gullo and Tommaso Lanciano",
title = "Core Decomposition in Multilayer Networks: Theory,
Algorithms, and Applications",
journal = j-TKDD,
volume = "14",
number = "1",
pages = "11:1--11:40",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369872",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Feb 6 07:36:59 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369872",
abstract = "Multilayer networks are a powerful paradigm to model
complex systems, where multiple relations occur between
the same entities. Despite the keen interest in a
variety of tasks, algorithms, and analyses in this type
of network, the problem of extracting \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shin:2020:FAP,
author = "Kijung Shin and Sejoon Oh and Jisu Kim and Bryan Hooi
and Christos Faloutsos",
title = "Fast, Accurate and Provable Triangle Counting in Fully
Dynamic Graph Streams",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "12:1--12:39",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3375392",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375392",
abstract = "Given a stream of edge additions and deletions, how
can we estimate the count of triangles in it? If we can
store only a subset of the edges, how can we obtain
unbiased estimates with small variances? Counting
triangles (i.e., cliques of size three) in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Han:2020:GLS,
author = "Huimei Han and Xingquan Zhu and Ying Li",
title = "Generalizing Long Short-Term Memory Network for Deep
Learning from Generic Data",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "13:1--13:28",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3366022",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3366022",
abstract = "Long Short-Term Memory (LSTM) network, a popular
deep-learning model, is particularly useful for data
with temporal correlation, such as texts, sequences, or
time series data, thanks to its well-sought after
recurrent network structures designed to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2020:BCR,
author = "Chi-Chun Lin and Kun-Ta Chuang and Wush Chi-Hsuan Wu
and Ming-Syan Chen",
title = "Budget-Constrained Real-Time Bidding Optimization:
Multiple Predictors Make It Better",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "14:1--14:27",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3375393",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375393",
abstract = "In this article, we pursue a better solution for the
promising problem, i.e., the bidding strategy design,
in the real-time bidding (RTB) advertising (AD)
environment. Under the budget constraint, the design of
an optimal strategy for bidding on each \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yan:2020:MTI,
author = "Xiaoqiang Yan and Zhengzheng Lou and Shizhe Hu and
Yangdong Ye",
title = "Multi-task Information Bottleneck Co-clustering for
Unsupervised Cross-view Human Action Categorization",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "15:1--15:23",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3375394",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375394",
abstract = "The widespread adoption of low-cost cameras generates
massive amounts of videos recorded from different
viewpoints every day. To cope with this vast amount of
unlabeled and heterogeneous data, a new multi-task
information bottleneck co-clustering (MIBC) \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2020:BRP,
author = "Bo Lin and Wei Luo and Zhiling Luo and Bo Wang and
Shuiguang Deng and Jianwei Yin and Mengchu Zhou",
title = "Bradykinesia Recognition in {Parkinson}'s Disease via
Single {RGB} Video",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "16:1--16:19",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369438",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369438",
abstract = "Parkinson's disease is a progressive nervous system
disorder afflicting millions of patients. Among its
motor symptoms, bradykinesia is one of the cardinal
manifestations. Experienced doctors are required for
the clinical diagnosis of bradykinesia, but \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2020:RTT,
author = "Shuai Liu and Guojie Song and Wenhao Huang",
title = "Real-time Transportation Prediction Correction using
Reconstruction Error in Deep Learning",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "17:1--17:20",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369871",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369871",
abstract = "In online complex systems such as transportation
system, an important work is real-time traffic
prediction. Due to the data shift, data model
inconsistency, and sudden change of traffic patterns
(like transportation accident), the prediction result
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dai:2020:CVE,
author = "Chenglong Dai and Dechang Pi and Stefanie I. Becker
and Jia Wu and Lin Cui and Blake Johnson",
title = "{CenEEGs}: Valid {EEG} Selection for Classification",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "18:1--18:25",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3371153",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3371153",
abstract = "This article explores valid brain
electroencephalography (EEG) selection for EEG
classification with different classifiers, which has
been rarely addressed in previous studies and is mostly
ignored by existing EEG processing methods and
applications. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dong:2020:RCP,
author = "Jialin Dong and Kai Yang and Yuanming Shi",
title = "Ranking from Crowdsourced Pairwise Comparisons via
Smoothed {Riemannian} Optimization",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "19:1--19:26",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372407",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372407",
abstract = "Social Internet of Things has recently become a
promising paradigm for augmenting the capability of
humans and devices connected in the networks to provide
services. In social Internet of Things network,
crowdsourcing that collects the intelligence of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2020:ANA,
author = "Yanan Xu and Yanyan Shen and Yanmin Zhu and Jiadi Yu",
title = "{AR 2 Net}: an Attentive Neural Approach for Business
Location Selection with Satellite Data and Urban Data",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "20:1--20:28",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372406",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372406",
abstract = "Business location selection is crucial to the success
of businesses. Traditional approaches like manual
survey investigate multiple factors, such as foot
traffic, neighborhood structure, and available
workforce, which are typically hard to measure. In
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Concas:2020:MSM,
author = "Francesco Concas and Pengfei Xu and Mohammad A. Hoque
and Jiaheng Lu and Sasu Tarkoma",
title = "Multiple Set Matching with {Bloom} Matrix and {Bloom}
Vector",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "21:1--21:21",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372409",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372409",
abstract = "Bloom Filter is a space-efficient probabilistic data
structure for checking the membership of elements in a
set. Given multiple sets, a standard Bloom Filter is
not sufficient when looking for the items to which an
element or a set of input elements \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2020:CDM,
author = "Pei-Zhen Li and Ling Huang and Chang-Dong Wang and
Jian-Huang Lai and Dong Huang",
title = "Community Detection by Motif-Aware Label Propagation",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "22:1--22:19",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3378537",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378537",
abstract = "Community detection (or graph clustering) is crucial
for unraveling the structural properties of complex
networks. As an important technique in community
detection, label propagation has shown the advantage of
finding a good community structure with \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2020:NAF,
author = "Yuan Guo and Yu Sun and Kai Wu and Kerong Jiang",
title = "New Algorithms of Feature Selection and Big Data
Assignment for {CBR} System Integrated by {Bayesian}
Network",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "23:1--23:20",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3373086",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3373086",
abstract = "Under big data, the integrated system of case-based
reasoning and Bayesian network has exhibited great
advantage in implementing the intelligence of
engineering application in many domains. To further
improve the performance of the hybrid system, this
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hua:2020:PTM,
author = "Ting Hua and Chang-Tien Lu and Jaegul Choo and Chandan
K. Reddy",
title = "Probabilistic Topic Modeling for Comparative Analysis
of Document Collections",
journal = j-TKDD,
volume = "14",
number = "2",
pages = "24:1--24:27",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369873",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Mar 10 08:50:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369873",
abstract = "Probabilistic topic models, which can discover hidden
patterns in documents, have been extensively studied.
However, rather than learning from a single document
collection, numerous real-world applications demand a
comprehensive understanding of the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lee:2020:LDS,
author = "Kwang Hee Lee and Myoung Ho Kim",
title = "Linearization of Dependency and Sampling for
Participation-based Betweenness Centrality in Very
Large {$B$}-hypergraphs",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "25:1--25:41",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3375399",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375399",
abstract = "A B-hypergraph consisting of nodes and directed
hyperedges is a generalization of the directed graph. A
directed hyperedge in the B-hypergraph represents a
relation from a set of source nodes to a single
destination node. We suggest one possible \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bian:2020:MSM,
author = "Jiang Bian and Haoyi Xiong and Yanjie Fu and Jun Huan
and Zhishan Guo",
title = "{MP$^2$SDA}: Multi-Party Parallelized Sparse
Discriminant Learning",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "26:1--26:22",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3374919",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3374919",
abstract = "Sparse Discriminant Analysis (SDA) has been widely
used to improve the performance of classical Fisher's
Linear Discriminant Analysis in supervised metric
learning, feature selection, and classification. With
the increasing needs of distributed data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tang:2020:ERF,
author = "Lei Tang and Zihang Liu and Yaling Zhao and Zongtao
Duan and Jingchi Jia",
title = "Efficient Ridesharing Framework for Ride-matching via
Heterogeneous Network Embedding",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "27:1--27:24",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3373839",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3373839",
abstract = "Ridesharing has attracted increasing attention in
recent years, and combines the flexibility and speed of
private cars with the reduced cost of fixed-line
systems to benefit alleviating traffic pressure. A
major issue in ridesharing is the accurate \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ermis:2020:DSD,
author = "Beyza Ermis and A. Taylan CemgIl",
title = "Data Sharing via Differentially Private Coupled Matrix
Factorization",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "28:1--28:27",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372408",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372408",
abstract = "We address the privacy-preserving data-sharing problem
in a distributed multiparty setting. In this setting,
each data site owns a distinct part of a dataset and
the aim is to estimate the parameters of a statistical
model conditioned on the complete \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2020:CIM,
author = "Yu Yang and Xiangbo Mao and Jian Pei and Xiaofei He",
title = "Continuous Influence Maximization",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "29:1--29:38",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3380928",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3380928",
abstract = "Imagine we are introducing a new product through a
social network, where we know for each user in the
network the function of purchase probability with
respect to discount. Then, what discounts should we
offer to those social network users so that, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ostovar:2020:RDC,
author = "Alireza Ostovar and Sander J. J. Leemans and Marcello
{La Rosa}",
title = "Robust Drift Characterization from Event Streams of
Business Processes",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "30:1--30:57",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3375398",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375398",
abstract = "Process workers may vary the normal execution of a
business process to adjust to changes in their
operational environment, e.g., changes in workload,
season, or regulations. Changes may be simple, such as
skipping an individual activity, or complex, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2020:SFE,
author = "Bang Liu and Fred X. Han and Di Niu and Linglong Kong
and Kunfeng Lai and Yu Xu",
title = "{Story Forest}: Extracting Events and Telling Stories
from Breaking News",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "31:1--31:28",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377939",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377939",
abstract = "Extracting events accurately from vast news corpora
and organize events logically is critical for news apps
and search engines, which aim to organize news
information collected from the Internet and present it
to users in the most sensible forms. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Akhtar:2020:DMT,
author = "Md Shad Akhtar and Dushyant Singh Chauhan and Asif
Ekbal",
title = "A Deep Multi-task Contextual Attention Framework for
Multi-modal Affect Analysis",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "32:1--32:27",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3380744",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3380744",
abstract = "Multi-modal affect analysis (e.g., sentiment and
emotion analysis) is an interdisciplinary study and has
been an emerging and prominent field in Natural
Language Processing and Computer Vision. The effective
fusion of multiple modalities (e.g., text, \ldots{}).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Abd-Elaziz:2020:EDM,
author = "M. M. Abd-Elaziz and Hazem M. El-Bakry and Ahmed Abou
Elfetouh and Amira Elzeiny",
title = "Enhanced Data Mining Technique to Measure Satisfaction
Degree of Social Media Users of {Xeljanz} Drug",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "33:1--33:13",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3389433",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3389433",
abstract = "In the recent times, social media has become important
in the field of health care as a major resource of
valuable health information. Social media can provide
massive amounts of data in real-time through user
interaction, and this data can be analysed \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tuomo:2020:BCC,
author = "Alasalmi Tuomo and Jaakko Suutala and Juha R{\"o}ning
and Heli Koskim{\"a}ki",
title = "Better Classifier Calibration for Small Datasets",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "34:1--34:19",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385656",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385656",
abstract = "Classifier calibration does not always go hand in hand
with the classifier's ability to separate the classes.
There are applications where good classifier
calibration, i.e., the ability to produce accurate
probability estimates, is more important than
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Amornbunchornvej:2020:FIF,
author = "Chainarong Amornbunchornvej and Tanya Berger-Wolf",
title = "Framework for Inferring Following Strategies from Time
Series of Movement Data",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "35:1--35:22",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385730",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385730",
abstract = "How do groups of individuals achieve consensus in
movement decisions? Do individuals follow their
friends, the one predetermined leader, or whomever just
happens to be nearby? To address these questions
computationally, we formalize Coordination S.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2020:NEC,
author = "Heli Sun and Fang He and Jianbin Huang and Yizhou Sun
and Yang Li and Chenyu Wang and Liang He and Zhongbin
Sun and Xiaolin Jia",
title = "Network Embedding for Community Detection in
Attributed Networks",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "36:1--36:25",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385415",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385415",
abstract = "Community detection aims to partition network nodes
into a set of clusters, such that nodes are more
densely connected to each other within the same cluster
than other clusters. For attributed networks, apart
from the denseness requirement of topology \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lappas:2020:MCP,
author = "Theodoros Lappas",
title = "Mining Career Paths from Large Resume Databases:
Evidence from {IT} Professionals",
journal = j-TKDD,
volume = "14",
number = "3",
pages = "37:1--37:38",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3379984",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue May 19 09:32:05 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3379984",
abstract = "The emergence of online professional platforms, such
as LinkedIn and Indeed, has led to unprecedented
volumes of rich resume data that have revolutionized
the study of careers. One of the most prevalent
problems in this space is the extraction of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2020:INA,
author = "Si Zhang and Hanghang Tong and Jie Tang and Jiejun Xu
and Wei Fan",
title = "Incomplete Network Alignment: Problem Definitions and
Fast Solutions",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "38:1--38:26",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3384203",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3384203",
abstract = "Networks are prevalent in many areas and are often
collected from multiple sources. However, due to the
veracity characteristics, more often than not, networks
are incomplete. Network alignment and network
completion have become two fundamental \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2020:FDA,
author = "Bintao Sun and T.-H. Hubert Chan and Mauro Sozio",
title = "Fully Dynamic Approximate $k$-Core Decomposition in
Hypergraphs",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "39:1--39:21",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385416",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385416",
abstract = "In this article, we design algorithms to maintain
approximate core values in dynamic hypergraphs. This
notion has been well studied for normal graphs in both
static and dynamic setting. We generalize the problem
to hypergraphs when edges can be inserted \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Balasubramaniam:2020:ENT,
author = "Thirunavukarasu Balasubramaniam and Richi Nayak and
Chau Yuen",
title = "Efficient Nonnegative Tensor Factorization via
Saturating Coordinate Descent",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "40:1--40:28",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385654",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385654",
abstract = "With the advancements in computing technology and
web-based applications, data are increasingly generated
in multi-dimensional form. These data are usually
sparse due to the presence of a large number of users
and fewer user interactions. To deal with \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kong:2020:GSS,
author = "Xiangjie Kong and Jun Zhang and Da Zhang and Yi Bu and
Ying Ding and Feng Xia",
title = "The Gene of Scientific Success",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "41:1--41:19",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385530",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385530",
abstract = "This article elaborates how to identify and evaluate
causal factors to improve scientific impact. Currently,
analyzing scientific impact can be beneficial to
various academic activities including funding
application, mentor recommendation, discovering
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2020:CTF,
author = "Cen Chen and Kenli Li and Sin G. Teo and Xiaofeng Zou
and Keqin Li and Zeng Zeng",
title = "Citywide Traffic Flow Prediction Based on Multiple
Gated Spatio-temporal Convolutional Neural Networks",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "42:1--42:23",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385414",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385414",
abstract = "Traffic flow prediction is crucial for public safety
and traffic management, and remains a big challenge
because of many complicated factors, e.g., multiple
spatio-temporal dependencies, holidays, and weather.
Some work leveraged 2D convolutional neural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2020:SCM,
author = "Tianyu Zhu and Guannan Liu and Guoqing Chen",
title = "Social Collaborative Mutual Learning for Item
Recommendation",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "43:1--43:19",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387162",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387162",
abstract = "Recommender Systems (RSs) provide users with item
choices based on their preferences reflected in past
interactions and become important tools to alleviate
the information overload problem for users. However, in
real-world scenarios, the user-item \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Constantinou:2020:LBN,
author = "Anthony C. Constantinou",
title = "Learning {Bayesian} Networks with the Saiyan
Algorithm",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "44:1--44:21",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385655",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385655",
abstract = "Some structure learning algorithms have proven to be
effective in reconstructing hypothetical Bayesian
Network graphs from synthetic data. However, in their
mission to maximise a scoring function, many become
conservative and minimise edges discovered. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2020:EEB,
author = "Changping Wang and Chaokun Wang and Zheng Wang and
Xiaojun Ye and Philip S. Yu",
title = "{Edge2vec}: Edge-based Social Network Embedding",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "45:1--45:24",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391298",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3391298",
abstract = "Graph embedding, also known as network embedding and
network representation learning, is a useful technique
which helps researchers analyze information networks
through embedding a network into a low-dimensional
space. However, existing graph embedding \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2020:SGC,
author = "Xiaofeng Zhu and Shichao Zhang and Jilian Zhang and
Yonggang Li and Guangquan Lu and Yang Yang",
title = "Sparse Graph Connectivity for Image Segmentation",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "46:1--46:19",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397188",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3397188",
abstract = "It has been demonstrated that the segmentation
performance is highly dependent on both subspace
preservation and graph connectivity. In the literature,
the full connectivity method linearly represents each
data point (e.g., a pixel in one image) by all
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Marques:2020:IEU,
author = "Henrique O. Marques and Ricardo J. G. B. Campello and
J{\"u}rg Sander and Arthur Zimek",
title = "Internal Evaluation of Unsupervised Outlier
Detection",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "47:1--47:42",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394053",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3394053",
abstract = "Although there is a large and growing literature that
tackles the unsupervised outlier detection problem, the
unsupervised evaluation of outlier detection results is
still virtually untouched in the literature. The
so-called internal evaluation, based \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2020:SWM,
author = "Xiaofeng Zhu and Shichao Zhang and Yonghua Zhu and Wei
Zheng and Yang Yang",
title = "Self-weighted Multi-view Fuzzy Clustering",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "48:1--48:17",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3396238",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3396238",
abstract = "Since the data in each view may contain distinct
information different from other views as well as has
common information for all views in multi-view
learning, many multi-view clustering methods have been
designed to use these information (including the
\ldots{}).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Das:2020:DAI,
author = "Shubhomoy Das and Weng-Keen Wong and Thomas Dietterich
and Alan Fern and Andrew Emmott",
title = "Discovering Anomalies by Incorporating Feedback from
an Expert",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "49:1--49:32",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3396608",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3396608",
abstract = "Unsupervised anomaly detection algorithms search for
outliers and then predict that these outliers are the
anomalies. When deployed, however, these algorithms are
often criticized for high false-positive and high
false-negative rates. One main cause of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2020:NSR,
author = "Yuanbo Xu and Yongjian Yang and En Wang and Jiayu Han
and Fuzhen Zhuang and Zhiwen Yu and Hui Xiong",
title = "Neural Serendipity Recommendation: Exploring the
Balance between Accuracy and Novelty with Sparse
Explicit Feedback",
journal = j-TKDD,
volume = "14",
number = "4",
pages = "50:1--50:25",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3396607",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jul 10 13:39:39 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3396607",
abstract = "Recommender systems have been playing an important
role in providing personalized information to users.
However, there is always a trade-off between accuracy
and novelty in recommender systems. Usually, many users
are suffering from redundant or \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xiong:2020:ISI,
author = "Hui Xiong and Chih-Jen Lin",
title = "Introduction to the Special Issue on the Best Papers
from {KDD 2018}",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "51e:1--51e:2",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3407901",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3407901",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51e",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2020:TOO,
author = "Ping Zhang and Zhifeng Bao and Yuchen Li and Guoliang
Li and Yipeng Zhang and Zhiyong Peng",
title = "Towards an Optimal Outdoor Advertising Placement: When
a Budget Constraint Meets Moving Trajectories",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "51:1--51:32",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3350488",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3350488",
abstract = "In this article, we propose and study the problem of
trajectory-driven influential billboard placement:
given a set of billboards U (each with a location and a
cost), a database of trajectories T, and a budget L, we
find a set of billboards within the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xiao:2020:MUM,
author = "Keli Xiao and Zeyang Ye and Lihao Zhang and Wenjun
Zhou and Yong Ge and Yuefan Deng",
title = "Multi-User Mobile Sequential Recommendation for Route
Optimization",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "52:1--52:28",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3360048",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3360048",
abstract = "We enhance the mobile sequential recommendation (MSR)
model and address some critical issues in existing
formulations by proposing three new forms of the MSR
from a multi-user perspective. The multi-user MSR
(MMSR) model searches optimal routes for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huai:2020:LDM,
author = "Mengdi Huai and Chenglin Miao and Yaliang Li and
Qiuling Suo and Lu Su and Aidong Zhang",
title = "Learning Distance Metrics from Probabilistic
Information",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "53:1--53:33",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3364320",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3364320",
abstract = "The goal of metric learning is to learn a good
distance metric that can capture the relationships
among instances, and its importance has long been
recognized in many fields. An implicit assumption in
the traditional settings of metric learning is that
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "53",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2020:PMG,
author = "Hongyuan Zhu and Qi Liu and Nicholas Jing Yuan and Kun
Zhang and Guang Zhou and Enhong Chen",
title = "Pop Music Generation: From Melody to Multi-style
Arrangement",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "54:1--54:31",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3374915",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3374915",
abstract = "Music plays an important role in our daily life. With
the development of deep learning and modern generation
techniques, researchers have done plenty of works on
automatic music generation. However, due to the special
requirements of both melody and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "54",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mautz:2020:NRS,
author = "Dominik Mautz and Wei Ye and Claudia Plant and
Christian B{\"o}hm",
title = "Non-Redundant Subspace Clusterings with {Nr-Kmeans}
and {Nr-DipMeans}",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "55:1--55:24",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385652",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3385652",
abstract = "A huge object collection in high-dimensional space can
often be clustered in more than one way, for instance,
objects could be clustered by their shape or
alternatively by their color. Each grouping represents
a different view of the dataset. The new \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "55",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Riondato:2020:MMI,
author = "Matteo Riondato and Fabio Vandin",
title = "{MiSoSouP}: Mining Interesting Subgroups with Sampling
and Pseudodimension",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "56:1--56:31",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385653",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3385653",
abstract = "We present MiSoSouP, a suite of algorithms for
extracting high-quality approximations of the most
interesting subgroups, according to different popular
interestingness measures, from a random sample of a
transactional dataset. We describe a new \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "56",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zugner:2020:AAG,
author = "Daniel Z{\"u}gner and Oliver Borchert and Amir
Akbarnejad and Stephan G{\"u}nnemann",
title = "Adversarial Attacks on Graph Neural Networks:
Perturbations and their Patterns",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "57:1--57:31",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394520",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3394520",
abstract = "Deep learning models for graphs have achieved strong
performance for the task of node classification.
Despite their proliferation, little is known about
their robustness to adversarial attacks. Yet, in
domains where they are likely to be used, e.g., the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "57",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2020:EAK,
author = "Xu Zhou and Kenli Li and Zhibang Yang and Yunjun Gao
and Keqin Li",
title = "Efficient Approaches to $k$ Representative {G-Skyline}
Queries",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "58:1--58:27",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397503",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3397503",
abstract = "The G-Skyline (GSky) query is a powerful tool to
analyze optimal groups in decision support. Compared
with other group skyline queries, it releases users
from providing an aggregate function. Besides, it can
get much comprehensive results without \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "58",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2020:UFS,
author = "Peilin Zhao and Dayong Wang and Pengcheng Wu and
Steven C. H. Hoi",
title = "A Unified Framework for Sparse Online Learning",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "59:1--59:20",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3361559",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3361559",
abstract = "The amount of data in our society has been exploding
in the era of big data. This article aims to address
several open challenges in big data stream
classification. Many existing studies in data mining
literature follow the batch learning setting, which
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "59",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ceccarello:2020:GCB,
author = "Matteo Ceccarello and Andrea Pietracaprina and Geppino
Pucci",
title = "A General Coreset-Based Approach to Diversity
Maximization under Matroid Constraints",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "60:1--60:27",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3402448",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3402448",
abstract = "Diversity maximization is a fundamental problem in web
search and data mining. For a given dataset $S$ of $n$
elements, the problem requires to determine a subset of
$S$ containing $ k \ll n$ ``representatives'' which
maximize some diversity function expressed in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "60",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nguyen:2020:EEC,
author = "Hung Nguyen and Xuejian Wang and Leman Akoglu",
title = "End-to-End Continual Rare-Class Recognition with
Emerging Novel Subclasses",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "61:1--61:28",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3399660",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3399660",
abstract = "Given a labeled dataset that contains a rare (or
minority) class containing of-interest instances, as
well as a large class of instances that are not of
interest, how can we learn to recognize future
of-interest instances over a continuous stream? The
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "61",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2020:EMO,
author = "Tingting Wang and Lei Duan and Guozhu Dong and Zhifeng
Bao",
title = "Efficient Mining of Outlying Sequence Patterns for
Analyzing Outlierness of Sequence Data",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "62:1--62:26",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3399671",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3399671",
abstract = "Recently, a lot of research work has been proposed in
different domains to detect outliers and analyze the
outlierness of outliers for relational data. However,
while sequence data is ubiquitous in real life,
analyzing the outlierness for sequence data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "62",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rossi:2020:PSR,
author = "Ryan A. Rossi and Di Jin and Sungchul Kim and Nesreen
K. Ahmed and Danai Koutra and John Boaz Lee",
title = "On Proximity and Structural Role-based Embeddings in
Networks: Misconceptions, Techniques, and
Applications",
journal = j-TKDD,
volume = "14",
number = "5",
pages = "63:1--63:37",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397191",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Aug 28 11:59:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3397191",
abstract = "Structural roles define sets of structurally similar
nodes that are more similar to nodes inside the set
than outside, whereas communities define sets of nodes
with more connections inside the set than outside.
Roles based on structural similarity and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "63",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nikolakopoulos:2020:BIB,
author = "Athanasios N. Nikolakopoulos and George Karypis",
title = "Boosting Item-based Collaborative Filtering via Nearly
Uncoupled Random Walks",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "64:1--64:26",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406241",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3406241",
abstract = "Item-based models are among the most popular
collaborative filtering approaches for building
recommender systems. Random walks can provide a
powerful tool for harvesting the rich network of
interactions captured within these models. They can
exploit \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "64",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2020:NAB,
author = "Jiarong Xu and Yifan Luo and Jianrong Tao and Changjie
Fan and Zhou Zhao and Jiangang Lu",
title = "{NGUARD+}: an Attention-based Game Bot Detection
Framework via Player Behavior Sequences",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "65:1--65:24",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3399711",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3399711",
abstract = "Game bots are automated programs that assist cheating
users, leading to an imbalance in the game ecosystem
and the collapse of user interest. Online games provide
immersive gaming experience and attract many loyal
fans. However, game bots have \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "65",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2020:IMS,
author = "Jianxiong Guo and Weili Wu",
title = "Influence Maximization: Seeding Based on Community
Structure",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "66:1--66:22",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3399661",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3399661",
abstract = "Influence maximization problem attempts to find a
small subset of nodes in a social network that makes
the expected influence maximized, which has been
researched intensively before. Most of the existing
literature focus only on maximizing total \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "66",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hu:2020:EUP,
author = "Renjun Hu and Yanchi Liu and Yanyan Li and Jingbo Zhou
and Shuai Ma and Hui Xiong",
title = "Exploiting User Preference and Mobile Peer Influence
for Human Mobility Annotation",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "67:1--67:18",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406600",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3406600",
abstract = "Human mobility annotation aims to assign mobility
records the corresponding visiting Point-of-Interests
(POIs). It is one of the most fundamental problems for
understanding human mobile behaviors. In literature,
many efforts have been devoted to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "67",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pang:2020:HUO,
author = "Guansong Pang and Longbing Cao",
title = "Heterogeneous Univariate Outlier Ensembles in
Multidimensional Data",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "68:1--68:27",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3403934",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3403934",
abstract = "In outlier detection, recent major research has
shifted from developing univariate methods to
multivariate methods due to the rapid growth of
multidimensional data. However, one typical issue of
this paradigm shift is that many multidimensional data
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "68",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zamzami:2020:PMF,
author = "Nuha Zamzami and Nizar Bouguila",
title = "Probabilistic Modeling for Frequency Vectors Using a
Flexible Shifted-Scaled {Dirichlet} Distribution
Prior",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "69:1--69:35",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406242",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3406242",
abstract = "Burstiness and overdispersion phenomena of count
vectors pose significant challenges in modeling such
data accurately. While the dependency assumption of the
multinomial distribution causes its failure to model
frequency vectors in several machine \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "69",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Paudel:2020:ACD,
author = "Ramesh Paudel and William Eberle",
title = "An Approach For Concept Drift Detection in a Graph
Stream Using Discriminative Subgraphs",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "70:1--70:25",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406243",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3406243",
abstract = "The emergence of mining complex networks like social
media, sensor networks, and the world-wide-web has
attracted considerable research interest. In a
streaming scenario, the concept to be learned can
change over time. However, while there has been some
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "70",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mohotti:2020:EOD,
author = "Wathsala Anupama Mohotti and Richi Nayak",
title = "Efficient Outlier Detection in Text Corpus Using Rare
Frequency and Ranking",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "71:1--71:30",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3399712",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3399712",
abstract = "Outlier detection in text data collections has become
significant due to the need of finding anomalies in the
myriad of text data sources. High feature
dimensionality, together with the larger size of these
document collections, presents a need for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "71",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2020:TWS,
author = "Chen Zhang and Steven C. H. Hoi and Fugee Tsung",
title = "Time-Warped Sparse Non-negative Factorization for
Functional Data Analysis",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "72:1--72:23",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3408313",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3408313",
abstract = "This article proposes a novel time-warped sparse
non-negative factorization method for functional data
analysis. The proposed method on the one hand
guarantees the extracted basis functions and their
coefficients to be positive and interpretable, and on
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "72",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Matheny:2020:SSS,
author = "Michael Matheny and Dong Xie and Jeff M. Phillips",
title = "Scalable Spatial Scan Statistics for Trajectories",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "73:1--73:24",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394046",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3394046",
abstract = "We define several new models for how to define
anomalous regions among enormous sets of trajectories.
These are based on spatial scan statistics, and
identify a geometric region which captures a subset of
trajectories which are significantly different
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "73",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2020:BDR,
author = "Shuangyin Li and Yu Zhang and Rong Pan",
title = "Bi-Directional Recurrent Attentional Topic Model",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "74:1--74:30",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3412371",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3412371",
abstract = "In a document, the topic distribution of a sentence
depends on both the topics of its neighbored sentences
and its own content, and it is usually affected by the
topics of the neighbored sentences with different
weights. The neighbored sentences of a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "74",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2020:RAL,
author = "Jipeng Guo and Yanfeng Sun and Junbin Gao and Yongli
Hu and Baocai Yin",
title = "Robust Adaptive Linear Discriminant Analysis with
Bidirectional Reconstruction Constraint",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "75:1--75:20",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409478",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3409478",
abstract = "Linear discriminant analysis (LDA) is a well-known
supervised method for dimensionality reduction in which
the global structure of data can be preserved. The
classical LDA is sensitive to the noises, and the
projection direction of LDA cannot preserve \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "75",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Savva:2020:LSD,
author = "Fotis Savva and Christos Anagnostopoulos and Peter
Triantafillou and Kostas Kolomvatsos",
title = "Large-scale Data Exploration Using Explanatory
Regression Functions",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "76:1--76:33",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3410448",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3410448",
abstract = "Analysts wishing to explore multivariate data spaces,
typically issue queries involving selection operators,
i.e., range or equality predicates, which define data
subspaces of potential interest. Then, they use
aggregation functions, the results of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "76",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ma:2020:RRT,
author = "Qian Ma and Yu Gu and Wang-Chien Lee and Ge Yu and
Hongbo Liu and Xindong Wu",
title = "{REMIAN}: Real-Time and Error-Tolerant Missing Value
Imputation",
journal = j-TKDD,
volume = "14",
number = "6",
pages = "77:1--77:38",
month = oct,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3412364",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Oct 8 06:52:44 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3412364",
abstract = "Missing value (MV) imputation is a critical
preprocessing means for data mining. Nevertheless,
existing MV imputation methods are mostly designed for
batch processing, and thus are not applicable to
streaming data, especially those with poor quality. In
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "77",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:HPR,
author = "Hao Wang and Shuai Ding and Yeqing Li and Xiaojian Li
and Youtao Zhang",
title = "Hierarchical Physician Recommendation via
Diversity-enhanced Matrix Factorization",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "1:1--1:17",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418227",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3418227",
abstract = "Recent studies have shown that there exhibits
significantly imbalanced medical resource allocation
across public hospitals. Patients, regardless of their
diseases, tend to choose hospitals and physicians with
a better reputation, which often overloads \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Galimberti:2021:SCD,
author = "Edoardo Galimberti and Martino Ciaperoni and Alain
Barrat and Francesco Bonchi and Ciro Cattuto and
Francesco Gullo",
title = "Span-core Decomposition for Temporal Networks:
Algorithms and Applications",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "2:1--2:44",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418226",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3418226",
abstract = "When analyzing temporal networks, a fundamental task
is the identification of dense structures (i.e., groups
of vertices that exhibit a large number of links),
together with their temporal span (i.e., the period of
time for which the high density holds). \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2021:DGM,
author = "Yu Huang and Josh Jia-Ching Ying and Philip S. Yu and
Vincent S. Tseng",
title = "Dynamic Graph Mining for Multi-weight
Multi-destination Route Planning with Deadlines
Constraints",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "3:1--3:32",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3412363",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3412363",
abstract = "Route planning satisfied multiple requests is an
emerging branch in the route planning field and has
attracted significant attention from the research
community in recent years. The prevailing studies focus
only on seeking a route by minimizing a single
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Siers:2021:CIC,
author = "Michael J. Siers and Md Zahidul Islam",
title = "Class Imbalance and Cost-Sensitive Decision Trees: a
Unified Survey Based on a Core Similarity",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "4:1--4:31",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3415156",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3415156",
abstract = "Class imbalance treatment methods and cost-sensitive
classification algorithms are typically treated as two
independent research areas. However, many of these
techniques have properties in common. After providing a
background to the two fields of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2021:MSN,
author = "Hong Huang and Yu Song and Fanghua Ye and Xing Xie and
Xuanhua Shi and Hai Jin",
title = "Multi-Stage Network Embedding for Exploring
Heterogeneous Edges",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "5:1--5:27",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3415157",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3415157",
abstract = "The relationships between objects in a network are
typically diverse and complex, leading to the
heterogeneous edges with different semantic
information. In this article, we focus on exploring the
heterogeneous edges for network representation
learning. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hu:2021:RTR,
author = "Yue Hu and Daniel B. Work",
title = "Robust Tensor Recovery with Fiber Outliers for Traffic
Events",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "6:1--6:27",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3417337",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3417337",
abstract = "Event detection is gaining increasing attention in
smart cities research. Large-scale mobility data serves
as an important tool to uncover the dynamics of urban
transportation systems, and more often than not the
dataset is incomplete. In this article, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2021:ARD,
author = "Xiaoyan Zhu and Yingbin Li and Jiayin Wang and Tian
Zheng and Jingwen Fu",
title = "Automatic Recommendation of a Distance Measure for
Clustering Algorithms",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "7:1--7:22",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418228",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3418228",
abstract = "With a large number of distance measures, the
appropriate choice for clustering a given data set with
a specified clustering algorithm becomes an important
problem. In this article, an automatic distance measure
recommendation method for clustering \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bernardini:2021:CAS,
author = "Giulia Bernardini and Huiping Chen and Alessio Conte
and Roberto Grossi and Grigorios Loukides and Nadia
Pisanti and Solon P. Pissis and Giovanna Rosone and
Michelle Sweering",
title = "Combinatorial Algorithms for String Sanitization",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "8:1--8:34",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418683",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3418683",
abstract = "String data are often disseminated to support
applications such as location-based service provision
or DNA sequence analysis. This dissemination, however,
may expose sensitive patterns that model confidential
knowledge (e.g., trips to mental health \ldots{}).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rossi:2021:HG,
author = "Ryan A. Rossi and Nesreen K. Ahmed and Aldo Carranza
and David Arbour and Anup Rao and Sungchul Kim and
Eunyee Koh",
title = "Heterogeneous Graphlets",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "9:1--9:43",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418773",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3418773",
abstract = "In this article, we introduce a generalization of
graphlets to heterogeneous networks called typed
graphlets. Informally, typed graphlets are small typed
induced subgraphs. Typed graphlets generalize graphlets
to rich heterogeneous networks as they \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ji:2021:ALS,
author = "Yugang Ji and Mingyang Yin and Hongxia Yang and
Jingren Zhou and Vincent W. Zheng and Chuan Shi and
Yuan Fang",
title = "Accelerating Large-Scale Heterogeneous Interaction
Graph Embedding Learning via Importance Sampling",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "10:1--10:23",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418684",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3418684",
abstract = "In real-world problems, heterogeneous entities are
often related to each other through multiple
interactions, forming a Heterogeneous Interaction Graph
(HIG). While modeling HIGs to deal with fundamental
tasks, graph neural networks present an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xun:2021:MPI,
author = "Guangxu Xun and Kishlay Jha and Aidong Zhang",
title = "{MeSHProbeNet-P}: Improving Large-scale {MeSH}
Indexing with Personalizable {MeSH} Probes",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "11:1--11:14",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3421713",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3421713",
abstract = "Indexing biomedical research articles with Medical
Subject Headings (MeSH) can greatly facilitate
biomedical research and information retrieval.
Currently MeSH indexing is performed by human experts.
To alleviate the time consumption and monetary cost
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tu:2021:CCJ,
author = "Jinzheng Tu and Guoxian Yu and Jun Wang and Carlotta
Domeniconi and Maozu Guo and Xiangliang Zhang",
title = "{CrowdWT}: Crowdsourcing via Joint Modeling of Workers
and Tasks",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "12:1--12:24",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3421712",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3421712",
abstract = "Crowdsourcing is a relatively inexpensive and
efficient mechanism to collect annotations of data from
the open Internet. Crowdsourcing workers are paid for
the provided annotations, but the task requester
usually has a limited budget. It is desirable to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Azevedo:2021:RNT,
author = "Ricardo {De Azevedo} and Gabriel Resende Machado and
Ronaldo Ribeiro Goldschmidt and Ricardo Choren",
title = "A Reduced Network Traffic Method for {IoT} Data
Clustering",
journal = j-TKDD,
volume = "15",
number = "1",
pages = "13:1--13:23",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423139",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Mar 28 09:45:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3423139",
abstract = "Internet of Things (IoT) systems usually involve
interconnected, low processing capacity, and low memory
sensor nodes (devices) that collect data in several
sorts of applications that interconnect people and
things. In this scenario, mining tasks, such \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rossi:2021:KGE,
author = "Andrea Rossi and Denilson Barbosa and Donatella
Firmani and Antonio Matinata and Paolo Merialdo",
title = "Knowledge Graph Embedding for Link Prediction: a
Comparative Analysis",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "14:1--14:49",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3424672",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3424672",
abstract = "Knowledge Graphs (KGs) have found many applications in
industrial and in academic settings, which in turn,
have motivated considerable research efforts towards
large-scale information extraction from a variety of
sources. Despite such efforts, it is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Amornbunchornvej:2021:ILM,
author = "Chainarong Amornbunchornvej and Navaporn Surasvadi and
Anon Plangprasopchok and Suttipong Thajchayapong",
title = "Identifying Linear Models in Multi-Resolution
Population Data Using Minimum Description Length
Principle to Predict Household Income",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "15:1--15:30",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3424670",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3424670",
abstract = "One shirt size cannot fit everybody, while we cannot
make a unique shirt that fits perfectly for everyone
because of resource limitations. This analogy is true
for policy making as well. Policy makers cannot make a
single policy to solve all problems \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Feng:2021:RSP,
author = "Yi Feng and Chuanyi Li and Jidong Ge and Bin Luo and
Vincent Ng",
title = "Recommending Statutes: a Portable Method Based on
Neural Networks",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "16:1--16:22",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3424671",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3424671",
abstract = "Legal judgment prediction, which aims at predicting
judgment results such as penalty, charges, and statutes
for cases, has attracted much attention recently. In
this article, we focus on building a recommender system
to predict the associated statutes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:HNH,
author = "Yashen Wang and Huanhuan Zhang",
title = "{HARP}: a Novel Hierarchical Attention Model for
Relation Prediction",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "17:1--17:22",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3424673",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3424673",
abstract = "Recent years have witnessed great advancement of
representation learning (RL)-based models for the
knowledge graph relation prediction task. However, they
generally rely on structure information embedded in the
encyclopedic knowledge graph, while the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2021:HOS,
author = "Dawei Zhou and Si Zhang and Mehmet Yigit Yildirim and
Scott Alcorn and Hanghang Tong and Hasan Davulcu and
Jingrui He",
title = "High-Order Structure Exploration on Massive Graphs: a
Local Graph Clustering Perspective",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "18:1--18:26",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3425637",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3425637",
abstract = "Modeling and exploring high-order connectivity
patterns, also called network motifs, are essential for
understanding the fundamental structures that control
and mediate the behavior of many complex systems. For
example, in social networks, triangles \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2021:EFM,
author = "Chongshou Li and Brenda Cheang and Zhixing Luo and
Andrew Lim",
title = "An Exponential Factorization Machine with Percentage
Error Minimization to Retail Sales Forecasting",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "19:1--19:32",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3426238",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3426238",
abstract = "This article proposes a new approach to sales
forecasting for new products (stock-keeping units
[SKUs]) with long lead time but short product life
cycle. These SKUs are usually sold for one season only,
without any replenishments. An exponential \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Djenouri:2021:TOD,
author = "Youcef Djenouri and Djamel Djenouri and Jerry Chun-Wei
Lin",
title = "Trajectory Outlier Detection: New Problems and
Solutions for Smart Cities",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "20:1--20:28",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3425867",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3425867",
abstract = "This article introduces two new problems related to
trajectory outlier detection: (1) group trajectory
outlier (GTO) detection and (2) deviation point
detection for both individual and group of trajectory
outliers. Five algorithms are proposed for the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:SSR,
author = "Kafeng Wang and Haoyi Xiong and Jiang Bian and
Zhanxing Zhu and Qian Gao and Zhishan Guo and
Cheng-Zhong Xu and Jun Huan and Dejing Dou",
title = "Sampling Sparse Representations with Randomized
Measurement {Langevin} Dynamics",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "21:1--21:21",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3427585",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3427585",
abstract = "Stochastic Gradient Langevin Dynamics (SGLD) have been
widely used for Bayesian sampling from certain
probability distributions, incorporating derivatives of
the log-posterior. With the derivative evaluation of
the log-posterior distribution, SGLD \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Belohlavek:2021:ATP,
author = "Radim Belohlavek and Martin Trnecka",
title = "The {8M} Algorithm from Today's Perspective",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "22:1--22:22",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3428078",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3428078",
abstract = "We provide a detailed analysis and a first complete
description of 8M-an old but virtually unknown
algorithm for Boolean matrix factorization. Even though
the algorithm uses a rather limited insight into the
factorization problem from today's \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2021:CIN,
author = "En Xu and Zhiwen Yu and Bin Guo and Helei Cui",
title = "Core Interest Network for Click-Through Rate
Prediction",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "23:1--23:16",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3428079",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3428079",
abstract = "In modern online advertising systems, the
click-through rate (CTR) is an important index to
measure the popularity of an item. It refers to the
ratio of users who click on a specific advertisement to
the number of total users who view it. Predicting the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ghosh:2021:CBE,
author = "Aindrila Ghosh and Mona Nashaat and James Miller and
Shaikh Quader",
title = "Context-Based Evaluation of Dimensionality Reduction
Algorithms-Experiments and Statistical Significance
Analysis",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "24:1--24:40",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3428077",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3428077",
abstract = "Dimensionality reduction is a commonly used technique
in data analytics. Reducing the dimensionality of
datasets helps not only with managing their analytical
complexity but also with removing redundancy. Over the
years, several such algorithms have \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2021:HNA,
author = "Shikang Liu and Fatemeh Vahedian and David Hachen and
Omar Lizardo and Christian Poellabauer and Aaron
Striegel and Tijana Milenkovi{\'c}",
title = "Heterogeneous Network Approach to Predict Individuals'
Mental Health",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "25:1--25:26",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3429446",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3429446",
abstract = "Depression and anxiety are critical public health
issues affecting millions of people around the world.
To identify individuals who are vulnerable to
depression and anxiety, predictive models have been
built that typically utilize data from one source.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2021:UMF,
author = "Zhengze Zhou and Giles Hooker",
title = "Unbiased Measurement of Feature Importance in
Tree-Based Methods",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "26:1--26:21",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3429445",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3429445",
abstract = "We propose a modification that corrects for
split-improvement variable importance measures in
Random Forests and other tree-based methods. These
methods have been shown to be biased towards increasing
the importance of features with more potential
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Almeida:2021:MCB,
author = "Matthew Almeida and Yong Zhuang and Wei Ding and Scott
E. Crouter and Ping Chen",
title = "Mitigating Class-Boundary Label Uncertainty to Reduce
Both Model Bias and Variance",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "27:1--27:18",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3429447",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3429447",
abstract = "The study of model bias and variance with respect to
decision boundaries is critically important in
supervised learning and artificial intelligence. There
is generally a tradeoff between the two, as fine-tuning
of the decision boundary of a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Munoz:2021:ISA,
author = "Mario Andr{\'e}s Mu{\~n}oz and Tao Yan and Matheus R.
Leal and Kate Smith-Miles and Ana Carolina Lorena and
Gisele L. Pappa and R{\^o}mulo Madureira Rodrigues",
title = "An Instance Space Analysis of Regression Problems",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "28:1--28:25",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436893",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3436893",
abstract = "The quest for greater insights into algorithm
strengths and weaknesses, as revealed when studying
algorithm performance on large collections of test
problems, is supported by interactive visual analytics
tools. A recent advance is Instance Space \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Coscia:2021:NCS,
author = "Michele Coscia",
title = "Noise Corrected Sampling of Online Social Networks",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "29:1--29:21",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434749",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3434749",
abstract = "In this article, we propose a new method to perform
topological network sampling. Topological network
sampling is a process for extracting a subset of nodes
and edges from a network, such that analyses on the
sample provide results and conclusions \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Steinbuss:2021:GAO,
author = "Georg Steinbuss and Klemens B{\"o}hm",
title = "Generating Artificial Outliers in the Absence of
Genuine Ones --- A Survey",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "30:1--30:37",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447822",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447822",
abstract = "By definition, outliers are rarely observed in
reality, making them difficult to detect or analyze.
Artificial outliers approximate such genuine outliers
and can, for instance, help with the detection of
genuine outliers or with benchmarking outlier-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2021:SCS,
author = "Yi Zhu and Lei Li and Xindong Wu",
title = "Stacked Convolutional Sparse Auto-Encoders for
Representation Learning",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "31:1--31:21",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434767",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3434767",
abstract = "Deep learning seeks to achieve excellent performance
for representation learning in image datasets. However,
supervised deep learning models such as convolutional
neural networks require a large number of labeled image
data, which is intractable in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2021:JTD,
author = "Bin Sun and Dehui Kong and Shaofan Wang and Lichun
Wang and Baocai Yin",
title = "Joint Transferable Dictionary Learning and View
Adaptation for Multi-view Human Action Recognition",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "32:1--32:23",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434746",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3434746",
abstract = "Multi-view human action recognition remains a
challenging problem due to large view changes. In this
article, we propose a transfer learning-based framework
called transferable dictionary learning and view
adaptation (TDVA) model for multi-view human \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Garciarena:2021:TAC,
author = "Unai Garciarena and Alexander Mendiburu and Roberto
Santana",
title = "Towards Automatic Construction of Multi-Network Models
for Heterogeneous Multi-Task Learning",
journal = j-TKDD,
volume = "15",
number = "2",
pages = "33:1--33:23",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434748",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sun Apr 11 08:38:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3434748",
abstract = "Multi-task learning, as it is understood nowadays,
consists of using one single model to carry out several
similar tasks. From classifying hand-written characters
of different alphabets to figuring out how to play
several Atari games using reinforcement \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ying:2021:IKB,
author = "Shi Ying and Bingming Wang and Lu Wang and Qingshan Li
and Yishi Zhao and Jianga Shang and Hao Huang and Guoli
Cheng and Zhe Yang and Jiangyi Geng",
title = "An Improved {KNN}-Based Efficient Log Anomaly
Detection Method with Automatically Labeled Samples",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "34:1--34:22",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441448",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441448",
abstract = "Logs that record system abnormal states (anomaly logs)
can be regarded as outliers, and the k-Nearest Neighbor
(kNN) algorithm has relatively high accuracy in outlier
detection methods. Therefore, we use the kNN algorithm
to detect anomalies in the log \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Salve:2021:PIU,
author = "Andrea {De Salve} and Paolo Mori and Barbara Guidi and
Laura Ricci and Roberto {Di Pietro}",
title = "Predicting Influential Users in Online Social Network
Groups",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "35:1--35:50",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441447",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441447",
abstract = "The widespread adoption of Online Social Networks
(OSNs), the ever-increasing amount of information
produced by their users, and the corresponding capacity
to influence markets, politics, and society, have led
both industrial and academic researchers to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xie:2021:UPC,
author = "Hong Xie and Mingze Zhong and Yongkun Li and John C.
S. Lui",
title = "Understanding Persuasion Cascades in Online Product
Rating Systems: Modeling, Analysis, and Inference",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "36:1--36:29",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3440887",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3440887",
abstract = "Online product rating systems have become an
indispensable component for numerous web services such
as Amazon, eBay, Google Play Store, and TripAdvisor.
One functionality of such systems is to uncover the
product quality via product ratings (or reviews)
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2021:POP,
author = "Zheng Zhang and Xiaofeng Zhu and Guangming Lu and
Yudong Zhang",
title = "Probability Ordinal-Preserving Semantic Hashing for
Large-Scale Image Retrieval",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "37:1--37:22",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442204",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442204",
abstract = "Semantic hashing enables computation and
memory-efficient image retrieval through learning
similarity-preserving binary representations. Most
existing hashing methods mainly focus on preserving the
piecewise class information or pairwise correlations of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shin:2021:CFA,
author = "Kijung Shin and Euiwoong Lee and Jinoh Oh and Mohammad
Hammoud and Christos Faloutsos",
title = "{CoCoS}: Fast and Accurate Distributed Triangle
Counting in Graph Streams",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "38:1--38:30",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441487",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441487",
abstract = "Given a graph stream, how can we estimate the number
of triangles in it using multiple machines with limited
storage? Specifically, how should edges be processed
and sampled across the machines for rapid and accurate
estimation? The count of triangles \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ata:2021:MVC,
author = "Sezin Kircali Ata and Yuan Fang and Min Wu and Jiaqi
Shi and Chee Keong Kwoh and Xiaoli Li",
title = "Multi-View Collaborative Network Embedding",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "39:1--39:18",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441450",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441450",
abstract = "Real-world networks often exist with multiple views,
where each view describes one type of interaction among
a common set of nodes. For example, on a video-sharing
network, while two user nodes are linked, if they have
common favorite videos in one view, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:SVR,
author = "Wei Wang and Feng Xia and Jian Wu and Zhiguo Gong and
Hanghang Tong and Brian D. Davison",
title = "{Scholar2vec}: Vector Representation of Scholars for
Lifetime Collaborator Prediction",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "40:1--40:19",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442199",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442199",
abstract = "While scientific collaboration is critical for a
scholar, some collaborators can be more significant
than others, e.g., lifetime collaborators. It has been
shown that lifetime collaborators are more influential
on a scholar's academic performance. However,.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bai:2021:TTG,
author = "Luyi Bai and Xiangnan Ma and Mingcheng Zhang and
Wenting Yu",
title = "{TPmod}: a Tendency-Guided Prediction Model for
Temporal Knowledge Graph Completion",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "41:1--41:17",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3443687",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3443687",
abstract = "Temporal knowledge graphs (TKGs) have become useful
resources for numerous Artificial Intelligence
applications, but they are far from completeness.
Inferring missing events in temporal knowledge graphs
is a fundamental and challenging task. However, most
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:RCE,
author = "Jingjing Wang and Wenjun Jiang and Kenli Li and Keqin
Li",
title = "Reducing Cumulative Errors of Incremental {CP}
Decomposition in Dynamic Online Social Networks",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "42:1--42:33",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441645",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441645",
abstract = "CANDECOMP/PARAFAC (CP) decomposition is widely used in
various online social network (OSN) applications.
However, it is inefficient when dealing with massive
and incremental data. Some incremental CP decomposition
(ICP) methods have been proposed to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2021:PGA,
author = "Guanhao Wu and Xiaofeng Gao and Ge Yan and Guihai
Chen",
title = "Parallel Greedy Algorithm to Multiple Influence
Maximization in Social Network",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "43:1--43:21",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442341",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442341",
abstract = "Influence Maximization (IM) problem is to select
influential users to maximize the influence spread,
which plays an important role in many real-world
applications such as product recommendation, epidemic
control, and network monitoring. Nowadays multiple
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2021:EDR,
author = "Lei Yang and Xi Yu and Jiannong Cao and Xuxun Liu and
Pan Zhou",
title = "Exploring Deep Reinforcement Learning for Task
Dispatching in Autonomous On-Demand Services",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "44:1--44:23",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442343",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442343",
abstract = "Autonomous on-demand services, such as GOGOX (formerly
GoGoVan) in Hong Kong, provide a platform for users to
request services and for suppliers to meet such
demands. In such a platform, the suppliers have
autonomy to accept or reject the demands to be
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cheng:2021:GLG,
author = "Lin Cheng and Yuliang Shi and Kun Zhang and Xinjun
Wang and Zhiyong Chen",
title = "{GGATB-LSTM}: Grouping and Global Attention-based
Time-aware Bidirectional {LSTM} Medical Treatment
Behavior Prediction",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "45:1--45:16",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441454",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441454",
abstract = "In China, with the continuous development of national
health insurance policies, more and more people have
joined the health insurance. How to accurately predict
patients future medical treatment behavior becomes a
hotspot issue. The biggest challenge in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2021:SRS,
author = "Xueyan Liu and Bo Yang and Hechang Chen and Katarzyna
Musial and Hongxu Chen and Yang Li and Wanli Zuo",
title = "A Scalable Redefined Stochastic Blockmodel",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "46:1--46:28",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442589",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442589",
abstract = "Stochastic blockmodel (SBM) is a widely used
statistical network representation model, with good
interpretability, expressiveness, generalization, and
flexibility, which has become prevalent and important
in the field of network science over the last
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2021:KTW,
author = "Yan Liu and Bin Guo and Daqing Zhang and Djamal
Zeghlache and Jingmin Chen and Ke Hu and Sizhe Zhang
and Dan Zhou and Zhiwen Yu",
title = "Knowledge Transfer with Weighted Adversarial Network
for Cold-Start Store Site Recommendation",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "47:1--47:27",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442203",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442203",
abstract = "Store site recommendation aims to predict the value of
the store at candidate locations and then recommend the
optimal location to the company for placing a new
brick-and-mortar store. Most existing studies focus on
learning machine learning or deep \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nasir:2021:TAM,
author = "Muhammad Anis Uddin Nasir and Cigdem Aslay and
Gianmarco {De Francisci Morales} and Matteo Riondato",
title = "{TipTap}: Approximate Mining of Frequent $k$-Subgraph
Patterns in Evolving Graphs",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "48:1--48:35",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442590",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442590",
abstract = "``Perhaps he could dance first and think afterwards,
if it isn't too much to ask him.'' S. Beckett, Waiting
for Godot Given a labeled graph, the collection of
-vertex induced connected subgraph patterns that appear
in the graph more frequently than a user-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2021:PIR,
author = "Chen Lin and Zhichao Ouyang and Xiaoli Wang and Hui Li
and Zhenhua Huang",
title = "Preserve Integrity in Realtime Event Summarization",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "49:1--49:29",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442344",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442344",
abstract = "Online text streams such as Twitter are the major
information source for users when they are looking for
ongoing events. Realtime event summarization aims to
generate and update coherent and concise summaries to
describe the state of a given event. Due to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2021:DLB,
author = "Jie Jiang and Qiuqiang Kong and Mark D. Plumbley and
Nigel Gilbert and Mark Hoogendoorn and Diederik M.
Roijers",
title = "Deep Learning-Based Energy Disaggregation and On\slash
Off Detection of Household Appliances",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "50:1--50:21",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441300",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441300",
abstract = "Energy disaggregation, a.k.a. Non-Intrusive Load
Monitoring, aims to separate the energy consumption of
individual appliances from the readings of a mains
power meter measuring the total energy consumption of,
e.g., a whole house. Energy consumption of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2021:EHQ,
author = "Haida Zhang and Zengfeng Huang and Xuemin Lin and Zhe
Lin and Wenjie Zhang and Ying Zhang",
title = "Efficient and High-Quality Seeded Graph Matching:
Employing Higher-order Structural Information",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "51:1--51:31",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442340",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442340",
abstract = "Driven by many real applications, we study the problem
of seeded graph matching. Given two graphs and, and a
small set of pre-matched node pairs where and, the
problem is to identify a matching between and growing
from, such that each pair in the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Barlaug:2021:NNE,
author = "Nils Barlaug and Jon Atle Gulla",
title = "Neural Networks for Entity Matching: a Survey",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "52:1--52:37",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442200",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442200",
abstract = "Entity matching is the problem of identifying which
records refer to the same real-world entity. It has
been actively researched for decades, and a variety of
different approaches have been developed. Even today,
it remains a challenging problem, and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2021:FCM,
author = "Chen Chen and Ruiyue Peng and Lei Ying and Hanghang
Tong",
title = "Fast Connectivity Minimization on Large-Scale
Networks",
journal = j-TKDD,
volume = "15",
number = "3",
pages = "53:1--53:25",
month = may,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442342",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 5 08:45:16 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442342",
abstract = "The connectivity of networks has been widely studied
in many high-impact applications, ranging from
immunization, critical infrastructure analysis, social
network mining, to bioinformatic system studies.
Regardless of the end application domains, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "53",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:LBC,
author = "Yunzhe Wang and George Baciu and Chenhui Li",
title = "A Layout-Based Classification Method for Visualizing
Time-Varying Graphs",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "54:1--54:24",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441301",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441301",
abstract = "Connectivity analysis between the components of large
evolving systems can reveal significant patterns of
interaction. The systems can be simulated by
topological graph structures. However, such analysis
becomes challenging on large and complex graphs.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "54",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ouyang:2021:MAC,
author = "Yi Ouyang and Bin Guo and Xing Tang and Xiuqiang He
and Jian Xiong and Zhiwen Yu",
title = "Mobile App Cross-Domain Recommendation with
Multi-Graph Neural Network",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "55:1--55:21",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442201",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442201",
abstract = "With the rapid development of mobile app ecosystem,
mobile apps have grown greatly popular. The explosive
growth of apps makes it difficult for users to find
apps that meet their interests. Therefore, it is
necessary to recommend user with a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "55",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dornaika:2021:EET,
author = "F. Dornaika",
title = "Elastic Embedding through Graph Convolution-based
Regression for Semi-supervised Classification",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "56:1--56:11",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441456",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441456",
abstract = "This article introduces a scheme for semi-supervised
learning by estimating a flexible non-linear data
representation that exploits Spectral Graph
Convolutions structure. Structured data are exploited
in order to determine non-linear and linear models.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "56",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2021:LTE,
author = "Yanni Li and Bing Liu and Yongbo Yu and Hui Li and
Jiacan Sun and Jiangtao Cui",
title = "{3E-LDA}: Three Enhancements to Linear Discriminant
Analysis",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "57:1--57:20",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442347",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442347",
abstract = "Linear discriminant analysis (LDA) is one of the
important techniques for dimensionality reduction,
machine learning, and pattern recognition. However, in
many applications, applying the classical LDA often
faces the following problems: (1) sensitivity
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "57",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zang:2021:JMS,
author = "Tianzi Zang and Yanmin Zhu and Yanan Xu and Jiadi Yu",
title = "Jointly Modeling Spatio-Temporal Dependencies and
Daily Flow Correlations for Crowd Flow Prediction",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "58:1--58:20",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439346",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3439346",
abstract = "Crowd flow prediction is a vital problem for an
intelligent transportation system construction in a
smart city. It plays a crucial role in traffic
management and behavioral analysis, thus it has raised
great attention from many researchers. However,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "58",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ahmed:2021:OST,
author = "Nesreen K. Ahmed and Nick Duffield and Ryan A. Rossi",
title = "Online Sampling of Temporal Networks",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "59:1--59:27",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442202",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3442202",
abstract = "Temporal networks representing a stream of timestamped
edges are seemingly ubiquitous in the real world.
However, the massive size and continuous nature of
these networks make them fundamentally challenging to
analyze and leverage for descriptive and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "59",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2021:SIF,
author = "Huan Zhao and Quanming Yao and Yangqiu Song and James
T. Kwok and Dik Lun Lee",
title = "Side Information Fusion for Recommender Systems over
Heterogeneous Information Network",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "60:1--60:32",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441446",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441446",
abstract = "Collaborative filtering (CF) has been one of the most
important and popular recommendation methods, which
aims at predicting users' preferences (ratings) based
on their past behaviors. Recently, various types of
side information beyond the explicit \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "60",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2021:SEB,
author = "Daokun Zhang and Jie Yin and Xingquan Zhu and Chengqi
Zhang",
title = "Search Efficient Binary Network Embedding",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "61:1--61:27",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436892",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3436892",
abstract = "Traditional network embedding primarily focuses on
learning a continuous vector representation for each
node, preserving network structure and/or node content
information, such that off-the-shelf machine learning
algorithms can be easily applied to the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "61",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Song:2021:NEH,
author = "Guojie Song and Yun Wang and Lun Du and Yi Li and
Junshan Wang",
title = "Network Embedding on Hierarchical Community Structure
Network",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "62:1--62:23",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434747",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3434747",
abstract = "Network embedding is a method of learning a
low-dimensional vector representation of network
vertices under the condition of preserving different
types of network properties. Previous studies mainly
focus on preserving structural information of vertices
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "62",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2021:UVC,
author = "Kui Yu and Lin Liu and Jiuyong Li",
title = "A Unified View of Causal and Non-causal Feature
Selection",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "63:1--63:46",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436891",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3436891",
abstract = "In this article, we aim to develop a unified view of
causal and non-causal feature selection methods. The
unified view will fill in the gap in the research of
the relation between the two types of methods. Based on
the Bayesian network framework and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "63",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yin:2021:RIR,
author = "Shuai Yin and Yanfeng Sun and Junbin Gao and Yongli Hu
and Boyue Wang and Baocai Yin",
title = "Robust Image Representation via Low Rank Locality
Preserving Projection",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "64:1--64:22",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434768",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3434768",
abstract = "Locality preserving projection (LPP) is a
dimensionality reduction algorithm preserving the
neighhorhood graph structure of data. However, the
conventional LPP is sensitive to outliers existing in
data. This article proposes a novel low-rank LPP model
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "64",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Steinbuss:2021:BUO,
author = "Georg Steinbuss and Klemens B{\"o}hm",
title = "Benchmarking Unsupervised Outlier Detection with
Realistic Synthetic Data",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "65:1--65:20",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441453",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441453",
abstract = "Benchmarking unsupervised outlier detection is
difficult. Outliers are rare, and existing benchmark
data contains outliers with various and unknown
characteristics. Fully synthetic data usually consists
of outliers and regular instances with clear \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "65",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2021:SEK,
author = "Mingkai Lin and Wenzhong Li and Lynda J. Song and
Cam-Tu Nguyen and Xiaoliang Wang and Sanglu Lu",
title = "{SAKE}: Estimating {Katz} Centrality Based on Sampling
for Large-Scale Social Networks",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "66:1--66:21",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441646",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441646",
abstract = "Katz centrality is a fundamental concept to measure
the influence of a vertex in a social network. However,
existing approaches to calculating Katz centrality in a
large-scale network are unpractical and computationally
expensive. In this article, we \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "66",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Amornbunchornvej:2021:VLG,
author = "Chainarong Amornbunchornvej and Elena Zheleva and
Tanya Berger-Wolf",
title = "Variable-lag {Granger} Causality and Transfer Entropy
for Time Series Analysis",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "67:1--67:30",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441452",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441452",
abstract = "Granger causality is a fundamental technique for
causal inference in time series data, commonly used in
the social and biological sciences. Typical
operationalizations of Granger causality make a strong
assumption that every time point of the effect
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "67",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xia:2021:ETD,
author = "Peike Xia and Wenjun Jiang and Jie Wu and Surong Xiao
and Guojun Wang",
title = "Exploiting Temporal Dynamics in Product Reviews for
Dynamic Sentiment Prediction at the Aspect Level",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "68:1--68:29",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441451",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441451",
abstract = "Online reviews and ratings play an important role in
shaping the purchase decisions of customers in
e-commerce. Many researches have been done to make
proper recommendations for users, by exploiting
reviews, ratings, user profiles, or behaviors.
However,. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "68",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kumar:2021:AGN,
author = "Suhansanu Kumar and Hari Sundaram",
title = "Attribute-Guided Network Sampling Mechanisms",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "69:1--69:24",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441445",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441445",
abstract = "This article introduces a novel task-independent
sampler for attributed networks. The problem is
important because while data mining tasks on network
content are common, sampling on internet-scale networks
is costly. Link-trace samplers such as Snowball
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "69",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ghasemi:2021:UEE,
author = "Negin Ghasemi and Ramin Fatourechi and Saeedeh
Momtazi",
title = "User Embedding for Expert Finding in Community
Question Answering",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "70:1--70:16",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441302",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441302",
abstract = "The number of users who have the appropriate knowledge
to answer asked questions in community question
answering is lower than those who ask questions.
Therefore, finding expert users who can answer the
questions is very crucial and useful. In this
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "70",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yan:2021:SAB,
author = "Ruidong Yan and Yi Li and Deying Li and Yongcai Wang
and Yuqing Zhu and Weili Wu",
title = "A Stochastic Algorithm Based on Reverse Sampling
Technique to Fight Against the Cyberbullying",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "71:1--71:22",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441455",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441455",
abstract = "Cyberbullying has caused serious consequences
especially for social network users in recent years.
However, the challenge is how to fight against the
cyberbullying effectively from the algorithmic
perspective. In this article, we study the fighting
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "71",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2021:ANE,
author = "Juan-Hui Li and Ling Huang and Chang-Dong Wang and
Dong Huang and Jian-Huang Lai and Pei Chen",
title = "Attributed Network Embedding with Micro-Meso
Structure",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "72:1--72:26",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441486",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441486",
abstract = "Recently, network embedding has received a large
amount of attention in network analysis. Although some
network embedding methods have been developed from
different perspectives, on one hand, most of the
existing methods only focus on leveraging the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "72",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2021:CHI,
author = "Benhui Zhang and Maoguo Gong and Jianbin Huang and
Xiaoke Ma",
title = "Clustering Heterogeneous Information Network by Joint
Graph Embedding and Nonnegative Matrix Factorization",
journal = j-TKDD,
volume = "15",
number = "4",
pages = "73:1--73:25",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441449",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 19 06:16:23 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441449",
abstract = "Many complex systems derived from nature and society
consist of multiple types of entities and heterogeneous
interactions, which can be effectively modeled as
heterogeneous information network (HIN). Structural
analysis of heterogeneous networks is of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "73",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yao:2021:SCI,
author = "Liuyi Yao and Zhixuan Chu and Sheng Li and Yaliang Li
and Jing Gao and Aidong Zhang",
title = "A Survey on Causal Inference",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "74:1--74:46",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3444944",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3444944",
abstract = "Causal inference is a critical research topic across
many domains, such as statistics, computer science,
education, public policy, and economics, for decades.
Nowadays, estimating causal effect from observational
data has become an appealing research \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "74",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jurdi:2021:CNN,
author = "Wissam {Al Jurdi} and Jacques {Bou Abdo} and Jacques
Demerjian and Abdallah Makhoul",
title = "Critique on Natural Noise in Recommender Systems",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "75:1--75:30",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447780",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447780",
abstract = "Recommender systems have been upgraded, tested, and
applied in many, often incomparable ways. In attempts
to diligently understand user behavior in certain
environments, those systems have been frequently
utilized in domains like e-commerce, e-learning,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "75",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Duong:2021:DGF,
author = "Quang-huy Duong and Heri Ramampiaro and Kjetil
N{\o}rv{\aa}g and Thu-lan Dam",
title = "Density Guarantee on Finding Multiple Subgraphs and
Subtensors",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "76:1--76:32",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446668",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3446668",
abstract = "Dense subregion (subgraph \& subtensor) detection is a
well-studied area, with a wide range of applications,
and numerous efficient approaches and algorithms have
been proposed. Approximation approaches are commonly
used for detecting dense subregions \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "76",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Burkhardt:2021:OAB,
author = "Paul Burkhardt",
title = "Optimal Algebraic Breadth-First Search for Sparse
Graphs",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "77:1--77:19",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446216",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3446216",
abstract = "There has been a rise in the popularity of algebraic
methods for graph algorithms given the development of
the GraphBLAS library and other sparse matrix methods.
An exemplar for these approaches is Breadth-First
Search (BFS). The algebraic BFS algorithm \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "77",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Maurya:2021:GNN,
author = "Sunil Kumar Maurya and Xin Liu and Tsuyoshi Murata",
title = "Graph Neural Networks for Fast Node Ranking
Approximation",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "78:1--78:32",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446217",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3446217",
abstract = "Graphs arise naturally in numerous situations,
including social graphs, transportation graphs, web
graphs, protein graphs, etc. One of the important
problems in these settings is to identify which nodes
are important in the graph and how they affect the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "78",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Stefani:2021:TSE,
author = "Lorenzo {De Stefani} and Erisa Terolli and Eli Upfal",
title = "Tiered Sampling: an Efficient Method for Counting
Sparse Motifs in Massive Graph Streams",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "79:1--79:52",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441299",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441299",
abstract = "We introduce Tiered Sampling, a novel technique for
estimating the count of sparse motifs in massive graphs
whose edges are observed in a stream. Our technique
requires only a single pass on the data and uses a
memory of fixed size M, which can be \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "79",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bauer:2021:ICL,
author = "Josef Bauer and Dietmar Jannach",
title = "Improved Customer Lifetime Value Prediction With
Sequence-To-Sequence Learning and Feature-Based
Models",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "80:1--80:37",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441444",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3441444",
abstract = "The prediction of the Customer Lifetime Value (CLV) is
an important asset for tool-supported marketing by
customer relationship managers. Since standard methods
based on purchase recency, frequency, and past profit
and revenue statistics often have \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "80",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sanei-Mehri:2021:MLM,
author = "Seyed-Vahid Sanei-Mehri and Apurba Das and Hooman
Hashemi and Srikanta Tirthapura",
title = "Mining Largest Maximal Quasi-Cliques",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "81:1--81:21",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446637",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3446637",
abstract = "Quasi-cliques are dense incomplete subgraphs of a
graph that generalize the notion of cliques.
Enumerating quasi-cliques from a graph is a robust way
to detect densely connected structures with
applications in bioinformatics and social network
analysis. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "81",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gan:2021:UMA,
author = "Wensheng Gan and Jerry Chun-Wei Lin and Jiexiong Zhang
and Hongzhi Yin and Philippe Fournier-Viger and
Han-Chieh Chao and Philip S. Yu",
title = "Utility Mining Across Multi-Dimensional Sequences",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "82:1--82:24",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446938",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3446938",
abstract = "Knowledge extraction from database is the fundamental
task in database and data mining community, which has
been applied to a wide range of real-world applications
and situations. Different from the support-based mining
models, the utility-oriented \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "82",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hao:2021:DEI,
author = "Shaoyang Hao and Bin Guo and Hao Wang and Yunji Liang
and Lina Yao and Qianru Wang and Zhiwen Yu",
title = "{DeepDepict}: Enabling Information Rich, Personalized
Product Description Generation With the Deep Multiple
Pointer Generator Network",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "83:1--83:16",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446982",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3446982",
abstract = "In e-commerce platforms, the online descriptive
information of products shows significant impacts on
the purchase behaviors. To attract potential buyers for
product promotion, numerous workers are employed to
write the impressive product descriptions. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "83",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2021:AIM,
author = "Jianxiong Guo and Weili Wu",
title = "Adaptive Influence Maximization: If Influential Node
Unwilling to Be the Seed",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "84:1--84:23",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447396",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447396",
abstract = "Influence maximization problem attempts to find a
small subset of nodes that makes the expected influence
spread maximized, which has been researched intensively
before. They all assumed that each user in the seed set
we select is activated successfully \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "84",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cheng:2021:DEB,
author = "Weiyu Cheng and Yanyan Shen and Linpeng Huang and
Yanmin Zhu",
title = "Dual-Embedding based Deep Latent Factor Models for
Recommendation",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "85:1--85:24",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447395",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447395",
abstract = "Among various recommendation methods, latent factor
models are usually considered to be state-of-the-art
techniques, which aim to learn user and item embeddings
for predicting user-item preferences. When applying
latent factor models to the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "85",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sharma:2021:STL,
author = "Shalini Sharma and Angshul Majumdar",
title = "Sequential Transform Learning",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "86:1--86:18",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447394",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447394",
abstract = "This work proposes a new approach for dynamical
modeling; we call it sequential transform learning.
This is loosely based on the transform (analysis
dictionary) learning formulation. This is the first
work on this topic. Transform learning, was \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "86",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2021:SAS,
author = "Kai Liu and Hongbo Liu and Tomas E. Ward and Hua Wang
and Yu Yang and Bo Zhang and Xindong Wu",
title = "Self-Adaptive Skeleton Approaches to Detect
Self-Organized Coalitions From Brain Functional
Networks Through Probabilistic Mixture Models",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "87:1--87:26",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447570",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447570",
abstract = "Detecting self-organized coalitions from functional
networks is one of the most important ways to uncover
functional mechanisms in the brain. Determining these
raises well-known technical challenges in terms of
scale imbalance, outliers and hard-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "87",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ling:2021:DGM,
author = "Xiang Ling and Lingfei Wu and Saizhuo Wang and Gaoning
Pan and Tengfei Ma and Fangli Xu and Alex X. Liu and
Chunming Wu and Shouling Ji",
title = "Deep Graph Matching and Searching for Semantic Code
Retrieval",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "88:1--88:21",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447571",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447571",
abstract = "Code retrieval is to find the code snippet from a
large corpus of source code repositories that highly
matches the query of natural language description.
Recent work mainly uses natural language processing
techniques to process both query texts (i.e.,
\ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "88",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Peng:2021:SSE,
author = "Hao Peng and Jianxin Li and Yangqiu Song and Renyu
Yang and Rajiv Ranjan and Philip S. Yu and Lifang He",
title = "Streaming Social Event Detection and Evolution
Discovery in Heterogeneous Information Networks",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "89:1--89:33",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447585",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447585",
abstract = "Events are happening in real world and real time,
which can be planned and organized for occasions, such
as social gatherings, festival celebrations,
influential meetings, or sports activities. Social
media platforms generate a lot of real-time text
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "89",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yue:2021:EBC,
author = "Lin Yue and Hao Shen and Sen Wang and Robert Boots and
Guodong Long and Weitong Chen and Xiaowei Zhao",
title = "Exploring {BCI} Control in Smart Environments:
Intention Recognition Via {EEG} Representation
Enhancement Learning",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "90:1--90:20",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450449",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3450449",
abstract = "The brain-computer interface (BCI) control technology
that utilizes motor imagery to perform the desired
action instead of manual operation will be widely used
in smart environments. However, most of the research
lacks robust feature representation of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "90",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2021:ADK,
author = "Huawen Liu and Enhui Li and Xinwang Liu and Kaile Su
and Shichao Zhang",
title = "Anomaly Detection With Kernel Preserving Embedding",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "91:1--91:18",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447684",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447684",
abstract = "Similarity representation plays a central role in
increasingly popular anomaly detection techniques,
which have been successfully applied in various
realistic scenes. Until now, many low-rank
representation techniques have been introduced to
measure the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "91",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2021:MMG,
author = "Bo Liu and Xi He and Mingdong Song and Jiangqiang Li
and Guangzhi Qu and Jianlei Lang and Rentao Gu",
title = "A Method for Mining {Granger} Causality Relationship
on Atmospheric Visibility",
journal = j-TKDD,
volume = "15",
number = "5",
pages = "92:1--92:16",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447681",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Jun 29 08:31:04 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447681",
abstract = "Atmospheric visibility is an indicator of atmospheric
transparency and its range directly reflects the
quality of the atmospheric environment. With the
acceleration of industrialization and urbanization, the
natural environment has suffered some \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "92",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Paul:2021:MOC,
author = "Dipanjyoti Paul and Rahul Kumar and Sriparna Saha and
Jimson Mathew",
title = "Multi-objective Cuckoo Search-based Streaming Feature
Selection for Multi-label Dataset",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "93:1--93:24",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447586",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447586",
abstract = "The feature selection method is the process of
selecting only relevant features by removing irrelevant
or redundant features amongst the large number of
features that are used to represent data. Nowadays,
many application domains especially social media
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "93",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Coro:2021:LRS,
author = "Federico Cor{\'o} and Gianlorenzo D'angelo and Yllka
Velaj",
title = "Link Recommendation for Social Influence
Maximization",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "94:1--94:23",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3449023",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3449023",
abstract = "Social link recommendation systems, like
``People-you-may-know'' on Facebook, ``Who-to-follow''
on Twitter, and ``Suggested-Accounts'' on Instagram
assist the users of a social network in establishing
new connections with other users. While these systems
are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "94",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gao:2021:TPO,
author = "Xiaofeng Gao and Wenyi Xu and Mingding Liao and Guihai
Chen",
title = "Trust Prediction for Online Social Networks with
Integrated Time-Aware Similarity",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "95:1--95:30",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447682",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447682",
abstract = "Online social networks gain increasing popularity in
recent years. In online social networks, trust
prediction is significant for recommendations of high
reputation users as well as in many other applications.
In the literature, trust prediction problem \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "95",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bressan:2021:FMC,
author = "Marco Bressan and Stefano Leucci and Alessandro
Panconesi",
title = "Faster Motif Counting via Succinct Color Coding and
Adaptive Sampling",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "96:1--96:27",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447397",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447397",
abstract = "We address the problem of computing the distribution
of induced connected subgraphs, aka graphlets or
motifs, in large graphs. The current state-of-the-art
algorithms estimate the motif counts via uniform
sampling by leveraging the color coding technique
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "96",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zeng:2021:FRD,
author = "Shaoning Zeng and Bob Zhang and Jianping Gou and Yong
Xu and Wei Huang",
title = "Fast and Robust Dictionary-based Classification for
Image Data",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "97:1--97:22",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3449360",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3449360",
abstract = "Dictionary-based classification has been promising in
knowledge discovery from image data, due to its good
performance and interpretable theoretical system.
Dictionary learning effectively supports both small-
and large-scale datasets, while its \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "97",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2021:SEN,
author = "Chenglin Li and Carrie Lu Tong and Di Niu and Bei
Jiang and Xiao Zuo and Lei Cheng and Jian Xiong and
Jianming Yang",
title = "Similarity Embedding Networks for Robust Human
Activity Recognition",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "98:1--98:17",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448021",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3448021",
abstract = "Deep learning models for human activity recognition
(HAR) based on sensor data have been heavily studied
recently. However, the generalization ability of deep
models on complex real-world HAR data is limited by the
availability of high-quality labeled \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "98",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Koley:2021:DEE,
author = "Paramita Koley and Avirup Saha and Sourangshu
Bhattacharya and Niloy Ganguly and Abir De",
title = "Demarcating Endogenous and Exogenous Opinion Dynamics:
an Experimental Design Approach",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "99:1--99:25",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3449361",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3449361",
abstract = "The networked opinion diffusion in online social
networks is often governed by the two genres of
opinions- endogenous opinions that are driven by the
influence of social contacts among users, and exogenous
opinions which are formed by external effects like
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "99",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Joaristi:2021:SGF,
author = "Mikel Joaristi and Edoardo Serra",
title = "{SIR-GN}: a Fast Structural Iterative Representation
Learning Approach For Graph Nodes",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "100:1--100:39",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450315",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3450315",
abstract = "Graph representation learning methods have attracted
an increasing amount of attention in recent years.
These methods focus on learning a numerical
representation of the nodes in a graph. Learning these
representations is a powerful instrument for tasks
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "100",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2021:LGN,
author = "Man Wu and Shirui Pan and Lan Du and Xingquan Zhu",
title = "Learning Graph Neural Networks with Positive and
Unlabeled Nodes",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "101:1--101:25",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450316",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3450316",
abstract = "Graph neural networks (GNNs) are important tools for
transductive learning tasks, such as node
classification in graphs, due to their expressive power
in capturing complex interdependency between nodes. To
enable GNN learning, existing works typically
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "101",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2021:NNS,
author = "Dongsheng Li and Haodong Liu and Chao Chen and
Yingying Zhao and Stephen M. Chu and Bo Yang",
title = "{NeuSE}: a Neural Snapshot Ensemble Method for
Collaborative Filtering",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "102:1--102:20",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450526",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3450526",
abstract = "In collaborative filtering (CF) algorithms, the
optimal models are usually learned by globally
minimizing the empirical risks averaged over all the
observed data. However, the global models are often
obtained via a performance tradeoff among users/items,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "102",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Deng:2021:PUT,
author = "Jinliang Deng and Xiusi Chen and Zipei Fan and Renhe
Jiang and Xuan Song and Ivor W. Tsang",
title = "The Pulse of Urban Transport: Exploring the
Co-evolving Pattern for Spatio-temporal Forecasting",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "103:1--103:25",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450528",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3450528",
abstract = "Transportation demand forecasting is a topic of large
practical value. However, the model that fits the
demand of one transportation by only considering the
historical data of its own could be vulnerable since
random fluctuations could easily impact the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "103",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:HCD,
author = "Yashen Wang and Huanhuan Zhang and Zhirun Liu and
Qiang Zhou",
title = "Hierarchical Concept-Driven Language Model",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "104:1--104:22",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451167",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451167",
abstract = "For guiding natural language generation, many
semantic-driven methods have been proposed. While
clearly improving the performance of the end-to-end
training task, these existing semantic-driven methods
still have clear limitations: for example, (i) they
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "104",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Alarte:2021:PLM,
author = "Juli{\'a}n Alarte and Josep Silva",
title = "Page-Level Main Content Extraction From Heterogeneous
{Webpages}",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "105:1--105:105",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451168",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451168",
abstract = "The main content of a webpage is often surrounded by
other boilerplate elements related to the template,
such as menus, advertisements, copyright notices, and
comments. For crawlers and indexers, isolating the main
content from the template and other \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "105",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nettasinghe:2021:MLE,
author = "Buddhika Nettasinghe and Vikram Krishnamurthy",
title = "Maximum Likelihood Estimation of Power-law Degree
Distributions via Friendship Paradox-based Sampling",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "106:1--106:28",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451166",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451166",
abstract = "This article considers the problem of estimating a
power-law degree distribution of an undirected network
using sampled data. Although power-law degree
distributions are ubiquitous in nature, the widely used
parametric methods for estimating them (e.g.,
\ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "106",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Anaissi:2021:OTB,
author = "Ali Anaissi and Basem Suleiman and Seid Miad Zandavi",
title = "Online Tensor-Based Learning Model for Structural
Damage Detection",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "107:1--107:18",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451217",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451217",
abstract = "The online analysis of multi-way data stored in a
tensor has become an essential tool for capturing the
underlying structures and extracting the sensitive
features that can be used to learn a predictive model.
However, data distributions often evolve \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "107",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:MIM,
author = "Rui Wang and Yongkun Li and Shuai Lin and Hong Xie and
Yinlong Xu and John C. S. Lui",
title = "On Modeling Influence Maximization in Social Activity
Networks under General Settings",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "108:1--108:28",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451218",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451218",
abstract = "Finding the set of most influential users in online
social networks (OSNs) to trigger the largest influence
cascade is meaningful, e.g., companies may leverage the
``word-of-mouth'' effect to trigger a large cascade of
purchases by offering free samples/. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "108",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2021:ICD,
author = "Zhe Chen and Aixin Sun and Xiaokui Xiao",
title = "Incremental Community Detection on Large Complex
Attributed Network",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "109:1--109:20",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451216",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451216",
abstract = "Community detection on network data is a fundamental
task, and has many applications in industry. Network
data in industry can be very large, with incomplete and
complex attributes, and more importantly, growing. This
calls for a community detection \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "109",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xia:2021:GDD,
author = "Tong Xia and Junjie Lin and Yong Li and Jie Feng and
Pan Hui and Funing Sun and Diansheng Guo and Depeng
Jin",
title = "{$3$DGCN}: {$3$-Dimensional} Dynamic Graph
Convolutional Network for Citywide Crowd Flow
Prediction",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "110:1--110:21",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451394",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451394",
abstract = "Crowd flow prediction is an essential task benefiting
a wide range of applications for the transportation
system and public safety. However, it is a challenging
problem due to the complex spatio-temporal dependence
and the complicated impact of urban \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "110",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2021:FBN,
author = "Kai Liu and Xiangyu Li and Zhihui Zhu and Lodewijk
Brand and Hua Wang",
title = "Factor-Bounded Nonnegative Matrix Factorization",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "111:1--111:18",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451395",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451395",
abstract = "Nonnegative Matrix Factorization (NMF) is broadly used
to determine class membership in a variety of
clustering applications. From movie recommendations and
image clustering to visual feature extractions, NMF has
applications to solve a large number of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "111",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2021:ACA,
author = "Huandong Wang and Yong Li and Mu Du and Zhenhui Li and
Depeng Jin",
title = "{App2Vec}: Context-Aware Application Usage
Prediction",
journal = j-TKDD,
volume = "15",
number = "6",
pages = "112:1--112:21",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451396",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Jul 21 07:02:35 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451396",
abstract = "Both app developers and service providers have strong
motivations to understand when and where certain apps
are used by users. However, it has been a challenging
problem due to the highly skewed and noisy app usage
data. Moreover, apps are regarded as \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "112",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:DLV,
author = "Fenglin Liu and Xian Wu and Shen Ge and Xuancheng Ren
and Wei Fan and Xu Sun and Yuexian Zou",
title = "{DiMBERT}: Learning Vision-Language Grounded
Representations with Disentangled
Multimodal-Attention",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "1:1--1:19",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447685",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447685",
abstract = "Vision-and-language (V-L) tasks require the system to
understand both vision content and natural language,
thus learning fine-grained joint representations of
vision and language (a.k.a. V-L representations) is of
paramount importance. Recently, various \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gao:2022:CDR,
author = "Chen Gao and Yong Li and Fuli Feng and Xiangning Chen
and Kai Zhao and Xiangnan He and Depeng Jin",
title = "Cross-domain Recommendation with Bridge-Item
Embeddings",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "2:1--2:23",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447683",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3447683",
abstract = "Web systems that provide the same functionality
usually share a certain amount of items. This makes it
possible to combine data from different websites to
improve recommendation quality, known as the
cross-domain recommendation task. Despite many research
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2022:LVA,
author = "Luyue Lin and Xin Zheng and Bo Liu and Wei Chen and
Yanshan Xiao",
title = "A Latent Variable Augmentation Method for Image
Categorization with Insufficient Training Samples",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "3:1--3:35",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451165",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451165",
abstract = "Over the past few years, we have made great progress
in image categorization based on convolutional neural
networks (CNNs). These CNNs are always trained based on
a large-scale image data set; however, people may only
have limited training samples for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gao:2022:GBS,
author = "Jianliang Gao and Xiaoting Ying and Cong Xu and
Jianxin Wang and Shichao Zhang and Zhao Li",
title = "Graph-Based Stock Recommendation by Time-Aware
Relational Attention Network",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "4:1--4:21",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451397",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451397",
abstract = "The stock market investors aim at maximizing their
investment returns. Stock recommendation task is to
recommend stocks with higher return ratios for the
investors. Most stock prediction methods study the
historical sequence patterns to predict stock
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2022:MML,
author = "Yaojin Lin and Qinghua Hu and Jinghua Liu and Xingquan
Zhu and Xindong Wu",
title = "{MULFE}: Multi-Label Learning via Label-Specific
Feature Space Ensemble",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "5:1--5:24",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451392",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451392",
abstract = "In multi-label learning, label correlations commonly
exist in the data. Such correlation not only provides
useful information, but also imposes significant
challenges for multi-label learning. Recently,
label-specific feature embedding has been proposed
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2022:JPF,
author = "Fandel Lin and Hsun-Ping Hsieh",
title = "A Joint Passenger Flow Inference and Path Recommender
System for Deploying New Routes and Stations of Mass
Transit Transportation",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "6:1--6:36",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451393",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451393",
abstract = "In this work, a novel decision assistant system for
urban transportation, called Route Scheme Assistant
(RSA), is proposed to address two crucial issues that
few former researches have focused on: route-based
passenger flow (PF) inference and multivariant
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:BAM,
author = "Huafeng Liu and Liping Jing and Jingxuan Wen and
Pengyu Xu and Jian Yu and Michael K. Ng",
title = "{Bayesian} Additive Matrix Approximation for Social
Recommendation",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "7:1--7:34",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451391",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451391",
abstract = "Social relations between users have been proven to be
a good type of auxiliary information to improve the
recommendation performance. However, it is a
challenging issue to sufficiently exploit the social
relations and correctly determine the user \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2022:RCT,
author = "Jinjin Guo and Longbing Cao and Zhiguo Gong",
title = "Recurrent Coupled Topic Modeling over Sequential
Documents",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "8:1--8:32",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451530",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451530",
abstract = "The abundant sequential documents such as online
archival, social media, and news feeds are streamingly
updated, where each chunk of documents is incorporated
with smoothly evolving yet dependent topics. Such
digital texts have attracted extensive \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2022:DLD,
author = "Yunyan Guo and Jianzhong Li",
title = "Distributed Latent {Dirichlet} Allocation on Streams",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "9:1--9:20",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451528",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451528",
abstract = "Latent Dirichlet Allocation (LDA) has been widely used
for topic modeling, with applications spanning various
areas such as natural language processing and
information retrieval. While LDA on small and static
datasets has been extensively studied, several
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Han:2022:EAI,
author = "Juhee Han and Younghoon Lee",
title = "Explainable Artificial Intelligence-Based Competitive
Factor Identification",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "10:1--10:11",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451529",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451529",
abstract = "Competitor analysis is an essential component of
corporate strategy, providing both offensive and
defensive strategic contexts to identify opportunities
and threats. The rapid development of social media has
recently led to several methodologies and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Laishram:2022:MEA,
author = "Ricky Laishram and Jeremy D. Wendt and Sucheta
Soundarajan",
title = "{MCS+}: an Efficient Algorithm for Crawling the
Community Structure in Multiplex Networks",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "11:1--11:32",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451527",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451527",
abstract = "In this article, we consider the problem of crawling a
multiplex network to identify the community structure
of a layer-of-interest. A multiplex network is one
where there are multiple types of relationships between
the nodes. In many multiplex networks, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2022:GML,
author = "Lichen Wang and Zhengming Ding and Yun Fu",
title = "Generic Multi-label Annotation via Adaptive Graph and
Marginalized Augmentation",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "12:1--12:20",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451884",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3451884",
abstract = "Multi-label learning recovers multiple labels from a
single instance. It is a more challenging task compared
with single-label manner. Most multi-label learning
approaches need large-scale well-labeled samples to
achieve high accurate performance. However,. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Moreo:2022:LTT,
author = "Alejandro Moreo and Andrea Esuli and Fabrizio
Sebastiani",
title = "Lost in Transduction: Transductive Transfer Learning
in Text Classification",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "13:1--13:21",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453146",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3453146",
abstract = "Obtaining high-quality labelled data for training a
classifier in a new application domain is often costly.
Transfer Learning (a.k.a. ``Inductive Transfer'') tries
to alleviate these costs by transferring, to the
``target'' domain of interest, knowledge \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2022:MTP,
author = "Yangfan Li and Kenli Li and Cen Chen and Xu Zhou and
Zeng Zeng and Keqin Li",
title = "Modeling Temporal Patterns with Dilated Convolutions
for Time-Series Forecasting",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "14:1--14:22",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453724",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3453724",
abstract = "Time-series forecasting is an important problem across
a wide range of domains. Designing accurate and prompt
forecasting algorithms is a non-trivial task, as
temporal data that arise in real applications often
involve both non-linear dynamics and linear \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2022:CCP,
author = "Keyu Yang and Yunjun Gao and Lei Liang and Song Bian
and Lu Chen and Baihua Zheng",
title = "{CrowdTC}: Crowd-powered Learning for Text
Classification",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "15:1--15:23",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457216",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3457216",
abstract = "Text classification is a fundamental task in content
analysis. Nowadays, deep learning has demonstrated
promising performance in text classification compared
with shallow models. However, almost all the existing
models do not take advantage of the wisdom \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:JMH,
author = "Haobing Liu and Yanmin Zhu and Tianzi Zang and Yanan
Xu and Jiadi Yu and Feilong Tang",
title = "Jointly Modeling Heterogeneous Student Behaviors and
Interactions among Multiple Prediction Tasks",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "16:1--16:24",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458023",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3458023",
abstract = "Prediction tasks about students have practical
significance for both student and college. Making
multiple predictions about students is an important
part of a smart campus. For instance, predicting
whether a student will fail to graduate can alert the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lee:2022:MMS,
author = "Wu Lee and Yuliang Shi and Hongfeng Sun and Lin Cheng
and Kun Zhang and Xinjun Wang and Zhiyong Chen",
title = "{MSIPA}: Multi-Scale Interval Pattern-Aware Network
for {ICU} Transfer Prediction",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "17:1--17:17",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458284",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3458284",
abstract = "Accurate prediction of patients' ICU transfer events
is of great significance for improving ICU treatment
efficiency. ICU transition prediction task based on
Electronic Health Records (EHR) is a temporal mining
task like many other health informatics \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2022:BSF,
author = "Min-Ling Zhang and Jun-Peng Fang and Yi-Bo Wang",
title = "{BiLabel}-Specific Features for Multi-Label
Classification",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "18:1--18:23",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458283",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3458283",
abstract = "In multi-label classification, the task is to induce
predictive models which can assign a set of relevant
labels for the unseen instance. The strategy of
label-specific features has been widely employed in
learning from multi-label examples, where the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:NMV,
author = "Bo Liu and Haowen Zhong and Yanshan Xiao",
title = "New Multi-View Classification Method with Uncertain
Data",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "19:1--19:23",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458282",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3458282",
abstract = "Multi-view classification aims at designing a
multi-view learning strategy to train a classifier from
multi-view data, which are easily collected in
practice. Most of the existing works focus on
multi-view classification by assuming the multi-view
data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Na:2022:USE,
author = "Gyoung S. Na and Hyunju Chang",
title = "Unsupervised Subspace Extraction via Deep Kernelized
Clustering",
journal = j-TKDD,
volume = "16",
number = "1",
pages = "20:1--20:15",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3459082",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:39 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3459082",
abstract = "Feature extraction has been widely studied to find
informative latent features and reduce the
dimensionality of data. In particular, due to the
difficulty in obtaining labeled data, unsupervised
feature extraction has received much attention in data
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Aggarwal:2022:CEC,
author = "Charu C. Aggarwal",
title = "Communication from the {Editor-in-Chief}: State of the
{{\booktitle{ACM Transactions on Knowledge Discovery
from Data}}}",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "21e:1--21e:2",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3463950",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3463950",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21e",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2022:SUM,
author = "Chunkai Zhang and Zilin Du and Yuting Yang and
Wensheng Gan and Philip S. Yu",
title = "On-Shelf Utility Mining of Sequence Data",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "21:1--21:31",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457570",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3457570",
abstract = "Utility mining has emerged as an important and
interesting topic owing to its wide application and
considerable popularity. However, conventional utility
mining methods have a bias toward items that have
longer on-shelf time as they have a greater chance
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tran:2022:CDP,
author = "Cong Tran and Won-Yong Shin and Andreas Spitz",
title = "Community Detection in Partially Observable Social
Networks",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "22:1--22:24",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461339",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3461339",
abstract = "The discovery of community structures in social
networks has gained significant attention since it is a
fundamental problem in understanding the networks'
topology and functions. However, most social network
data are collected from partially observable \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2022:CDL,
author = "Zhao Li and Junshuai Song and Zehong Hu and Zhen Wang
and Jun Gao",
title = "Constrained Dual-Level Bandit for Personalized
Impression Regulation in Online Ranking Systems",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "23:1--23:23",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461340",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3461340",
abstract = "Impression regulation plays an important role in
various online ranking systems, e.g., e-commerce
ranking systems always need to achieve local commercial
demands on some pre-labeled target items like fresh
item cultivation and fraudulent item \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ohare:2022:HVT,
author = "Kevin O'hare and Anna Jurek-Loughrey and Cassio {De
Campos}",
title = "High-Value Token-Blocking: Efficient Blocking Method
for Record Linkage",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "24:1--24:17",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450527",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3450527",
abstract = "Data integration is an important component of Big Data
analytics. One of the key challenges in data
integration is record linkage, that is, matching
records that represent the same real-world entity.
Because of computational costs, methods referred to as
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ding:2022:ESU,
author = "Ming Ding and Tianyu Wang and Xudong Wang",
title = "Establishing Smartphone User Behavior Model Based on
Energy Consumption Data",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "25:1--25:40",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461459",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3461459",
abstract = "In smartphone data analysis, both energy consumption
modeling and user behavior mining have been explored
extensively, but the relationship between energy
consumption and user behavior has been rarely studied.
Such a relationship is explored over large-\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Saude:2022:RRB,
author = "Jo{\~a}o Sa{\'u}de and Guilherme Ramos and Ludovico
Boratto and Carlos Caleiro",
title = "A Robust Reputation-Based Group Ranking System and Its
Resistance to Bribery",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "26:1--26:35",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3462210",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3462210",
abstract = "The spread of online reviews and opinions and its
growing influence on people's behavior and decisions
boosted the interest to extract meaningful information
from this data deluge. Hence, crowdsourced ratings of
products and services gained a critical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2022:EHG,
author = "Hanlu Wu and Tengfei Ma and Lingfei Wu and Fangli Xu
and Shouling Ji",
title = "Exploiting Heterogeneous Graph Neural Networks with
Latent Worker\slash Task Correlation Information for
Label Aggregation in Crowdsourcing",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "27:1--27:18",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460865",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3460865",
abstract = "Crowdsourcing has attracted much attention for its
convenience to collect labels from non-expert workers
instead of experts. However, due to the high level of
noise from the non-experts, a label aggregation model
that infers the true label from noisy \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2022:MNV,
author = "Hui-Jia Li and Lin Wang and Zhan Bu and Jie Cao and
Yong Shi",
title = "Measuring the Network Vulnerability Based on {Markov}
Criticality",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "28:1--28:24",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464390",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3464390",
abstract = "Vulnerability assessment-a critical issue for
networks-attempts to foresee unexpected destructive
events or hostile attacks in the whole system. In this
article, we consider a new Markov global connectivity
metric-Kemeny constant, and take its derivative
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2022:SBA,
author = "Guangtao Wang and Gao Cong and Ying Zhang and Zhen Hai
and Jieping Ye",
title = "A Synopsis Based Approach for Itemset Frequency
Estimation over Massive Multi-Transaction Stream",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "29:1--29:30",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465238",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3465238",
abstract = "The streams where multiple transactions are associated
with the same key are prevalent in practice, e.g., a
customer has multiple shopping records arriving at
different time. Itemset frequency estimation on such
streams is very challenging since sampling \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yin:2022:WFM,
author = "Jianfei Yin and Ruili Wang and Yeqing Guo and Yizhe
Bai and Shunda Ju and Weili Liu and Joshua Zhexue
Huang",
title = "Wealth Flow Model: Online Portfolio Selection Based on
Learning Wealth Flow Matrices",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "30:1--30:27",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464308",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3464308",
abstract = "This article proposes a deep learning solution to the
online portfolio selection problem based on learning a
latent structure directly from a price time series. It
introduces a novel wealth flow matrix for representing
a latent structure that has special \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hidalgo:2022:DAD,
author = "Juan I. G. Hidalgo and Silas G. T. C. Santos and
Roberto S. M. Barros",
title = "Dynamically Adjusting Diversity in Ensembles for the
Classification of Data Streams with Concept Drift",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "31:1--31:20",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3466616",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3466616",
abstract = "A data stream can be defined as a system that
continually generates a lot of data over time. Today,
processing data streams requires new demands and
challenging tasks in the data mining and machine
learning areas. Concept Drift is a problem commonly
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cantini:2022:LSH,
author = "Riccardo Cantini and Fabrizio Marozzo and Giovanni
Bruno and Paolo Trunfio",
title = "Learning Sentence-to-Hashtags Semantic Mapping for
Hashtag Recommendation on Microblogs",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "32:1--32:26",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3466876",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3466876",
abstract = "The growing use of microblogging platforms is
generating a huge amount of posts that need effective
methods to be classified and searched. In Twitter and
other social media platforms, hashtags are exploited by
users to facilitate the search, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Abebe:2022:ODO,
author = "Rediet Abebe and T.-H. Hubert Chan and Jon Kleinberg
and Zhibin Liang and David Parkes and Mauro Sozio and
Charalampos E. Tsourakakis",
title = "Opinion Dynamics Optimization by Varying
Susceptibility to Persuasion via Non-Convex Local
Search",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "33:1--33:34",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3466617",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3466617",
abstract = "A long line of work in social psychology has studied
variations in people's susceptibility to persuasion-the
extent to which they are willing to modify their
opinions on a topic. This body of literature suggests
an interesting perspective on theoretical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2022:SHS,
author = "Yang Yang and Hongchen Wei and Zhen-Qiang Sun and
Guang-Yu Li and Yuanchun Zhou and Hui Xiong and Jian
Yang",
title = "{S2OSC}: a Holistic Semi-Supervised Approach for Open
Set Classification",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "34:1--34:27",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3468675",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3468675",
abstract = "Open set classification (OSC) tackles the problem of
determining whether the data are in-class or
out-of-class during inference, when only provided with
a set of in-class examples at training time.
Traditional OSC methods usually train discriminative or
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2022:EHI,
author = "Yiding Zhang and Xiao Wang and Nian Liu and Chuan
Shi",
title = "Embedding Heterogeneous Information Network in
Hyperbolic Spaces",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "35:1--35:23",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3468674",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3468674",
abstract = "Heterogeneous information network (HIN) embedding,
aiming to project HIN into a low-dimensional space, has
attracted considerable research attention. Most of the
existing HIN embedding methods focus on preserving the
inherent network structure and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2022:CCN,
author = "Xueyuan Wang and Hongpo Zhang and Zongmin Wang and
Yaqiong Qiao and Jiangtao Ma and Honghua Dai",
title = "{Con\&Net}: a Cross-Network Anchor Link Discovery
Method Based on Embedding Representation",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "36:1--36:18",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3469083",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3469083",
abstract = "Cross-network anchor link discovery is an important
research problem and has many applications in
heterogeneous social network. Existing schemes of
cross-network anchor link discovery can provide
reasonable link discovery results, but the quality of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2022:HVA,
author = "Hangbin Zhang and Raymond K. Wong and Victor W. Chu",
title = "Hybrid Variational Autoencoder for Recommender
Systems",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "37:1--37:37",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470659",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3470659",
abstract = "E-commerce platforms heavily rely on automatic
personalized recommender systems, e.g., collaborative
filtering models, to improve customer experience. Some
hybrid models have been proposed recently to address
the deficiency of existing models. However, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Oliveira:2022:ALS,
author = "Lucas {Santos De Oliveira and} Pedro O. S. Vaz-De-Melo
and Aline {Carneiro Viana}",
title = "Assessing Large-Scale Power Relations among Locations
from Mobility Data",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "38:1--38:31",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470770",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3470770",
abstract = "The pervasiveness of smartphones has shaped our lives,
social norms, and the structure that dictates human
behavior. They now directly influence how individuals
demand resources or interact with network services.
From this scenario, identifying key \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2022:KKR,
author = "Zhenyu Zhang and Lei Zhang and Dingqi Yang and Liu
Yang",
title = "{KRAN}: Knowledge Refining Attention Network for
Recommendation",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "39:1--39:20",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470783",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3470783",
abstract = "Recommender algorithms combining knowledge graph and
graph convolutional network are becoming more and more
popular recently. Specifically, attributes describing
the items to be recommended are often used as
additional information. These attributes along
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2022:STE,
author = "Liang Zhao and Yuyang Gao and Jieping Ye and Feng Chen
and Yanfang Ye and Chang-Tien Lu and Naren
Ramakrishnan",
title = "Spatio-Temporal Event Forecasting Using Incremental
Multi-Source Feature Learning",
journal = j-TKDD,
volume = "16",
number = "2",
pages = "40:1--40:28",
month = apr,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464976",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Sep 14 07:09:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3464976",
abstract = "The forecasting of significant societal events such as
civil unrest and economic crisis is an interesting and
challenging problem which requires both timeliness,
precision, and comprehensiveness. Significant societal
events are influenced and indicated \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lei:2022:ODR,
author = "Shuo Lei and Xuchao Zhang and Liang Zhao and Arnold P.
Boedihardjo and Chang-Tien Lu",
title = "Online and Distributed Robust Regressions with
Extremely Noisy Labels",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "41:1--41:24",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3473038",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3473038",
abstract = "In today's era of big data, robust least-squares
regression becomes a more challenging problem when
considering the extremely corrupted labels along with
explosive growth of datasets. Traditional robust
methods can handle the noise but suffer from several
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2022:KDA,
author = "Xingjian Li and Haoyi Xiong and Zeyu Chen and Jun Huan
and Ji Liu and Cheng-Zhong Xu and Dejing Dou",
title = "Knowledge Distillation with Attention for Deep
Transfer Learning of Convolutional Networks",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "42:1--42:20",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3473912",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3473912",
abstract = "Transfer learning through fine-tuning a pre-trained
neural network with an extremely large dataset, such as
ImageNet, can significantly improve and accelerate
training while the accuracy is frequently bottlenecked
by the limited dataset size of the new \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nashaat:2022:SSE,
author = "Mona Nashaat and Aindrila Ghosh and James Miller and
Shaikh Quader",
title = "Semi-Supervised Ensemble Learning for Dealing with
Inaccurate and Incomplete Supervision",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "43:1--43:33",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3473910",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3473910",
abstract = "In real-world tasks, obtaining a large set of
noise-free data can be prohibitively expensive.
Therefore, recent research tries to enable machine
learning to work with weakly supervised datasets, such
as inaccurate or incomplete data. However, the previous
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shao:2022:NEM,
author = "Ping Shao and Yang Yang and Shengyao Xu and Chunping
Wang",
title = "Network Embedding via Motifs",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "44:1--44:20",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3473911",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3473911",
abstract = "Network embedding has emerged as an effective way to
deal with downstream tasks, such as node classification
[ 16 , 31 , 42 ]. Most existing methods leverage
multi-similarities between nodes such as connectivity,
which considers vertices that are closely \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kuang:2022:BSS,
author = "Kun Kuang and Hengtao Zhang and Runze Wu and Fei Wu
and Yueting Zhuang and Aijun Zhang",
title = "Balance-Subsampled Stable Prediction Across Unknown
Test Data",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "45:1--45:21",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477052",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3477052",
abstract = "In data mining and machine learning, it is commonly
assumed that training and test data share the same
population distribution. However, this assumption is
often violated in practice because of the sample
selection bias, which might induce the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2022:DDG,
author = "Ling Chen and Xing Tang and Weiqi Chen and Yuntao Qian
and Yansheng Li and Yongjun Zhang",
title = "{DACHA}: a Dual Graph Convolution Based Temporal
Knowledge Graph Representation Learning Method Using
Historical Relation",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "46:1--46:18",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477051",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3477051",
abstract = "Temporal knowledge graph (TKG) representation learning
embeds relations and entities into a continuous
low-dimensional vector space by incorporating temporal
information. Latest studies mainly aim at learning
entity representations by modeling entity \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2022:CAS,
author = "Huandong Wang and Yong Li and Junjie Lin and Hancheng
Cao and Depeng Jin",
title = "Context-Aware Semantic Annotation of Mobility
Records",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "47:1--47:20",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477048",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3477048",
abstract = "The wide adoption of mobile devices has provided us
with a massive volume of human mobility records.
However, a large portion of these records is unlabeled,
i.e., only have GPS coordinates without semantic
information (e.g., Point of Interest (POI)). To
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shi:2022:CLC,
author = "Tian Shi and Xuchao Zhang and Ping Wang and Chandan K.
Reddy",
title = "Corpus-level and Concept-based Explanations for
Interpretable Document Classification",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "48:1--48:17",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477539",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3477539",
abstract = "Using attention weights to identify information that
is important for models' decision making is a popular
approach to interpret attention-based neural networks.
This is commonly realized in practice through the
generation of a heat-map for every single \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Feng:2022:CAS,
author = "Jie Feng and Yong Li and Ziqian Lin and Can Rong and
Funing Sun and Diansheng Guo and Depeng Jin",
title = "Context-aware Spatial-Temporal Neural Network for
Citywide Crowd Flow Prediction via Modeling Long-range
Spatial Dependency",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "49:1--49:21",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477577",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3477577",
abstract = "Crowd flow prediction is of great importance in a wide
range of applications from urban planning, traffic
control to public safety. It aims at predicting the
inflow (the traffic of crowds entering a region in a
given time interval) and outflow (the \ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2022:UAN,
author = "Yang Zhou and Jiaxiang Ren and Ruoming Jin and Zijie
Zhang and Jingyi Zheng and Zhe Jiang and Da Yan and
Dejing Dou",
title = "Unsupervised Adversarial Network Alignment with
Reinforcement Learning",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "50:1--50:29",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477050",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3477050",
abstract = "Network alignment, which aims at learning a matching
between the same entities across multiple information
networks, often suffers challenges from feature
inconsistency, high-dimensional features, to unstable
alignment results. This article presents a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2022:NMN,
author = "Youxi Wu and Lanfang Luo and Yan Li and Lei Guo and
Philippe Fournier-Viger and Xingquan Zhu and Xindong
Wu",
title = "{NTP-Miner}: Nonoverlapping Three-Way Sequential
Pattern Mining",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "51:1--51:21",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3480245",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3480245",
abstract = "Nonoverlapping sequential pattern mining is an
important type of sequential pattern mining (SPM) with
gap constraints, which not only can reveal interesting
patterns to users but also can effectively reduce the
search space using the Apriori (anti-\ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2022:NPO,
author = "Yuanchun Jiang and Ruicheng Liang and Ji Zhang and
Jianshan Sun and Yezheng Liu and Yang Qian",
title = "Network Public Opinion Detection During the
Coronavirus Pandemic: a Short-Text Relational Topic
Model",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "52:1--52:27",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3480246",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3480246",
abstract = "Online social media provides rich and varied
information reflecting the significant concerns of the
public during the coronavirus pandemic. Analyzing what
the public is concerned with from social media
information can support policy-makers to maintain the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2022:GCI,
author = "Heli Sun and Yang Li and Bing Lv and Wujie Yan and
Liang He and Shaojie Qiao and Jianbin Huang",
title = "{Graph Community Infomax}",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "53:1--53:21",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3480244",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3480244",
abstract = "Graph representation learning aims at learning
low-dimension representations for nodes in graphs, and
has been proven very useful in several downstream
tasks. In this article, we propose a new model, Graph
Community Infomax (GCI), that can adversarial
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "53",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:SGQ,
author = "Guliu Liu and Lei Li and Guanfeng Liu and Xindong Wu",
title = "Social Group Query Based on Multi-Fuzzy-Constrained
Strong Simulation",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "54:1--54:27",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3481640",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3481640",
abstract = "Traditional social group analysis mostly uses
interaction models, event models, or other social
network analysis methods to identify and distinguish
groups. This type of method can divide social
participants into different groups based on their
geographic \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "54",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liang:2022:NFB,
author = "Shangsong Liang and Zhuo Ouyang and Zaiqiao Meng",
title = "A Normalizing Flow-Based Co-Embedding Model for
Attributed Networks",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "55:1--55:31",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3477049",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3477049",
abstract = "Network embedding is a technique that aims at
inferring the low-dimensional representations of nodes
in a semantic space. In this article, we study the
problem of inferring the low-dimensional
representations of both nodes and attributes for
attributed \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "55",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2022:TAG,
author = "Yonghui Xu and Shengjie Sun and Huiguo Zhang and
Chang'an Yi and Yuan Miao and Dong Yang and Xiaonan
Meng and Yi Hu and Ke Wang and Huaqing Min and Hengjie
Song and Chuanyan Miao",
title = "Time-Aware Graph Embedding: a Temporal Smoothness and
Task-Oriented Approach",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "56:1--56:23",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3480243",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3480243",
abstract = "Knowledge graph embedding, which aims at learning the
low-dimensional representations of entities and
relationships, has attracted considerable research
efforts recently. However, most knowledge graph
embedding methods focus on the structural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "56",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sowah:2022:HEH,
author = "Robert A. Sowah and Bernard Kuditchar and Godfrey A.
Mills and Amevi Acakpovi and Raphael A. Twum and Gifty
Buah and Robert Agboyi",
title = "{HCBST}: an Efficient Hybrid Sampling Technique for
Class Imbalance Problems",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "57:1--57:37",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488280",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3488280",
abstract = "Class imbalance problem is prevalent in many
real-world domains. It has become an active area of
research. In binary classification problems, imbalance
learning refers to learning from a dataset with a high
degree of skewness to the negative class. This
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "57",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jin:2022:TUE,
author = "Junchen Jin and Mark Heimann and Di Jin and Danai
Koutra",
title = "Toward Understanding and Evaluating Structural Node
Embeddings",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "58:1--58:32",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3481639",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3481639",
abstract = "While most network embedding techniques model the
proximity between nodes in a network, recently there
has been significant interest in structural embeddings
that are based on node equivalences, a notion rooted in
sociology: equivalences or positions are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "58",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2022:DFE,
author = "Mengzhuo Guo and Zhongzhi Xu and Qingpeng Zhang and
Xiuwu Liao and Jiapeng Liu",
title = "Deciphering Feature Effects on Decision-Making in
Ordinal Regression Problems: an Explainable Ordinal
Factorization Model",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "59:1--59:26",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487048",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3487048",
abstract = "Ordinal regression predicts the objects' labels that
exhibit a natural ordering, which is vital to
decision-making problems such as credit scoring and
clinical diagnosis. In these problems, the ability to
explain how the individual features and their
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "59",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2022:SMH,
author = "Jerry Chun-Wei Lin and Youcef Djenouri and Gautam
Srivastava and Yuanfa Li and Philip S. Yu",
title = "Scalable Mining of High-Utility Sequential Patterns
With Three-Tier {MapReduce} Model",
journal = j-TKDD,
volume = "16",
number = "3",
pages = "60:1--60:26",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487046",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Dec 10 11:04:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3487046",
abstract = "High-utility sequential pattern mining (HUSPM) is a
hot research topic in recent decades since it combines
both sequential and utility properties to reveal more
information and knowledge rather than the traditional
frequent itemset mining or sequential \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "60",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gupta:2022:CDL,
author = "Manish Gupta and Puneet Agrawal",
title = "Compression of Deep Learning Models for Text: a
Survey",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "61:1--61:55",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487045",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3487045",
abstract = "In recent years, the fields of natural language
processing (NLP) and information retrieval (IR) have
made tremendous progress thanks to deep learning models
like Recurrent Neural Networks (RNNs), Gated Recurrent
Units (GRUs) and Long Short-Term Memory \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "61",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:FMM,
author = "Chang Liu and Jie Yan and Feiyue Guo and Min Guo",
title = "Forecasting the Market with Machine Learning
Algorithms: an Application of {NMC-BERT-LSTM-DQN-X}
Algorithm in Quantitative Trading",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "62:1--62:22",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488378",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3488378",
abstract = "Although machine learning (ML) algorithms have been
widely used in forecasting the trend of stock market
indices, they failed to consider the following crucial
aspects for market forecasting: (1) that investors'
emotions and attitudes toward future market \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "62",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:RRH,
author = "Danlu Liu and Yu Li and William Baskett and Dan Lin
and Chi-Ren Shyu",
title = "{RHPTree}-Risk Hierarchical Pattern Tree for Scalable
Long Pattern Mining",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "63:1--63:33",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488380",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3488380",
abstract = "Risk patterns are crucial in biomedical research and
have served as an important factor in precision health
and disease prevention. Despite recent development in
parallel and high-performance computing, existing risk
pattern mining methods still struggle \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "63",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ma:2022:MIF,
author = "Muyang Ma and Pengjie Ren and Zhumin Chen and Zhaochun
Ren and Lifan Zhao and Peiyu Liu and Jun Ma and Maarten
de Rijke",
title = "Mixed Information Flow for Cross-Domain Sequential
Recommendations",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "64:1--64:32",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487331",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3487331",
abstract = "Cross-domain sequential recommendation is the task of
predict the next item that the user is most likely to
interact with based on past sequential behavior from
multiple domains. One of the key challenges in
cross-domain sequential recommendation is to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "64",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Fu:2022:TTR,
author = "Zhe Fu and Li Yu and Xi Niu",
title = "{TRACE}: Travel Reinforcement Recommendation Based on
Location-Aware Context Extraction",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "65:1--65:22",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487047",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3487047",
abstract = "As the popularity of online travel platforms
increases, users tend to make ad-hoc decisions on
places to visit rather than preparing the detailed tour
plans in advance. Under the situation of timeliness and
uncertainty of users' demand, how to integrate
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "65",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2022:CFS,
author = "Kui Yu and Yajing Yang and Wei Ding",
title = "Causal Feature Selection with Missing Data",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "66:1--66:24",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488055",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3488055",
abstract = "Causal feature selection aims at learning the Markov
blanket (MB) of a class variable for feature selection.
The MB of a class variable implies the local causal
structure among the class variable and its MB and all
other features are probabilistically \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "66",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gao:2022:TES,
author = "Fei Gao and Jiada Li and Yisu Ge and Jianwen Shao and
Shufang Lu and Libo Weng",
title = "A Trajectory Evaluator by Sub-tracks for Detecting
{VOT}-based Anomalous Trajectory",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "67:1--67:19",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3490032",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3490032",
abstract = "With the popularization of visual object tracking
(VOT), more and more trajectory data are obtained and
have begun to gain widespread attention in the fields
of mobile robots, intelligent video surveillance, and
the like. How to clean the anomalous \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "67",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jafariakinabad:2022:SSR,
author = "Fereshteh Jafariakinabad and Kien A. Hua",
title = "A Self-Supervised Representation Learning of Sentence
Structure for Authorship Attribution",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "68:1--68:16",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3491203",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3491203",
abstract = "The syntactic structure of sentences in a document
substantially informs about its authorial writing
style. Sentence representation learning has been widely
explored in recent years and it has been shown that it
improves the generalization of different \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "68",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2022:PPM,
author = "Honghui Xu and Zhipeng Cai and Wei Li",
title = "Privacy-Preserving Mechanisms for Multi-Label Image
Recognition",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "69:1--69:21",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3491231",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3491231",
abstract = "Multi-label image recognition has been an
indispensable fundamental component for many real
computer vision applications. However, a severe threat
of privacy leakage in multi-label image recognition has
been overlooked by existing studies. To fill this
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "69",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Abulaish:2022:DSK,
author = "Muhammad Abulaish and Mohd Fazil and Mohammed J.
Zaki",
title = "Domain-Specific Keyword Extraction Using Joint
Modeling of Local and Global Contextual Semantics",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "70:1--70:30",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494560",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494560",
abstract = "Domain-specific keyword extraction is a vital task in
the field of text mining. There are various research
tasks, such as spam e-mail classification, abusive
language detection, sentiment analysis, and emotion
mining, where a set of domain-specific \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "70",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yuan:2022:AMS,
author = "Mu Yuan and Lan Zhang and Xiang-Yang Li and Lin-Zhuo
Yang and Hui Xiong",
title = "Adaptive Model Scheduling for Resource-efficient Data
Labeling",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "71:1--71:22",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494559",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494559",
abstract = "Labeling data (e.g., labeling the people, objects,
actions, and scene in images) comprehensively and
efficiently is a widely needed but challenging task.
Numerous models were proposed to label various data and
many approaches were designed to enhance the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "71",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2022:DEL,
author = "Min-Ling Zhang and Jing-Han Wu and Wei-Xuan Bao",
title = "Disambiguation Enabled Linear Discriminant Analysis
for Partial Label Dimensionality Reduction",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "72:1--72:18",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494565",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494565",
abstract = "As an emerging weakly supervised learning framework,
partial label learning considers inaccurate supervision
where each training example is associated with multiple
candidate labels among which only one is valid. In this
article, a first attempt toward \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "72",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2022:AMP,
author = "Chenji Huang and Yixiang Fang and Xuemin Lin and Xin
Cao and Wenjie Zhang",
title = "{ABLE}: Meta-Path Prediction in Heterogeneous
Information Networks",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "73:1--73:21",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494558",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494558",
abstract = "Given a heterogeneous information network (HIN) H, a
head node h, a meta-path P, and a tail node t, the
meta-path prediction aims at predicting whether h can
be linked to t by an instance of P. Most existing
solutions either require predefined meta-paths,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "73",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yuan:2022:AIC,
author = "Junkun Yuan and Anpeng Wu and Kun Kuang and Bo Li and
Runze Wu and Fei Wu and Lanfen Lin",
title = "{Auto IV}: Counterfactual Prediction via Automatic
Instrumental Variable Decomposition",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "74:1--74:20",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494568",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494568",
abstract = "Instrumental variables (IVs), sources of treatment
randomization that are conditionally independent of the
outcome, play an important role in causal inference
with unobserved confounders. However, the existing
IV-based counterfactual prediction methods \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "74",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bhatia:2022:RTA,
author = "Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji
Yoon and Kijung Shin and Christos Faloutsos",
title = "Real-Time Anomaly Detection in Edge Streams",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "75:1--75:22",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494564",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494564",
abstract = "Given a stream of graph edges from a dynamic graph,
how can we assign anomaly scores to edges in an online
manner, for the purpose of detecting unusual behavior,
using constant time and memory? Existing approaches aim
to detect individually surprising. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "75",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sheshbolouki:2022:SBA,
author = "Aida Sheshbolouki and M. Tamer {\"O}zsu",
title = "{sGrapp}: Butterfly Approximation in Streaming
Graphs",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "76:1--76:43",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3495011",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3495011",
abstract = "We study the fundamental problem of butterfly (i.e.,
(2,2)-bicliques) counting in bipartite streaming
graphs. Similar to triangles in unipartite graphs,
enumerating butterflies is crucial in understanding the
structure of bipartite graphs. This benefits \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "76",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2022:MGL,
author = "Hanrui Wu and Michael K. Ng",
title = "Multiple Graphs and Low-Rank Embedding for
Multi-Source Heterogeneous Domain Adaptation",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "77:1--77:25",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3492804",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3492804",
abstract = "Multi-source domain adaptation is a challenging topic
in transfer learning, especially when the data of each
domain are represented by different kinds of features,
i.e., Multi-source Heterogeneous Domain Adaptation
(MHDA). It is important to take \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "77",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Prokhorenkova:2022:WLM,
author = "Liudmila Prokhorenkova and Alexey Tikhonov and Nelly
Litvak",
title = "When Less Is More: Systematic Analysis of
Cascade-Based Community Detection",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "78:1--78:22",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494563",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494563",
abstract = "Information diffusion, spreading of infectious
diseases, and spreading of rumors are fundamental
processes occurring in real-life networks. In many
practical cases, one can observe when nodes become
infected, but the underlying network, over which a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "78",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2022:DTA,
author = "Xu Yang and Chao Song and Mengdi Yu and Jiqing Gu and
Ming Liu",
title = "Distributed Triangle Approximately Counting Algorithms
in Simple Graph Stream",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "79:1--79:43",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494562",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494562",
abstract = "Recently, the counting algorithm of local topology
structures, such as triangles, has been widely used in
social network analysis, recommendation systems, user
portraits and other fields. At present, the problem of
counting global and local triangles in a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "79",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2022:HCN,
author = "Hanrui Wu and Michael K. Ng",
title = "Hypergraph Convolution on Nodes-Hyperedges Network for
Semi-Supervised Node Classification",
journal = j-TKDD,
volume = "16",
number = "4",
pages = "80:1--80:19",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494567",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Jan 14 06:33:11 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494567",
abstract = "Hypergraphs have shown great power in representing
high-order relations among entities, and lots of
hypergraph-based deep learning methods have been
proposed to learn informative data representations for
the node classification problem. However, most of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "80",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shen:2022:MTM,
author = "Yanyan Shen and Baoyuan Ou and Ranzhen Li",
title = "{MBN}: Towards Multi-Behavior Sequence Modeling for
Next Basket Recommendation",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "81:1--81:23",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3497748",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3497748",
abstract = "Next basket recommendation aims at predicting the next
set of items that a user would likely purchase
together, which plays an important role in e-commerce
platforms. Unlike conventional item recommendation, the
next basket recommendation focuses on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "81",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ke:2022:MRG,
author = "Xiangyu Ke and Arijit Khan and Francesco Bonchi",
title = "Multi-relation Graph Summarization",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "82:1--82:30",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494561",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494561",
abstract = "Graph summarization is beneficial in a wide range of
applications, such as visualization, interactive and
exploratory analysis, approximate query processing,
reducing the on-disk storage footprint, and graph
processing in modern hardware. However, the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "82",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2022:OLB,
author = "Weirong Chen and Jiaqi Zheng and Haoyu Yu and Guihai
Chen and Yixin Chen and Dongsheng Li",
title = "Online Learning Bipartite Matching with Non-stationary
Distributions",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "83:1--83:22",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502734",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502734",
abstract = "Online bipartite matching has attracted wide interest
since it can successfully model the popular online
car-hailing problem and sharing economy. Existing works
consider this problem under either adversary setting or
i.i.d. setting. The former is too \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "83",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ihou:2022:SVO,
author = "Koffi Eddy Ihou and Manar Amayri and Nizar Bouguila",
title = "Stochastic Variational Optimization of a Hierarchical
{Dirichlet} Process Latent Beta-{Liouville} Topic
Model",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "84:1--84:48",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502727",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502727",
abstract = "In topic models, collections are organized as
documents where they arise as mixtures over latent
clusters called topics. A topic is a distribution over
the vocabulary. In large-scale applications, parametric
or finite topic mixture models such as LDA \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "84",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Davvetas:2022:ETL,
author = "Athanasios Davvetas and Iraklis A. Klampanos and
Spiros Skiadopoulos and Vangelis Karkaletsis",
title = "Evidence Transfer: Learning Improved Representations
According to External Heterogeneous Task Outcomes",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "85:1--85:22",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502732",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502732",
abstract = "Unsupervised representation learning tends to produce
generic and reusable latent representations. However,
these representations can often miss high-level
features or semantic information, since they only
observe the implicit properties of the dataset.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "85",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Varde:2022:CES,
author = "Aparna S. Varde",
title = "Computational Estimation by Scientific Data Mining
with Classical Methods to Automate Learning Strategies
of Scientists",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "86:1--86:52",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502736",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502736",
abstract = "Experimental results are often plotted as
2-dimensional graphical plots (aka graphs) in
scientific domains depicting dependent versus
independent variables to aid visual analysis of
processes. Repeatedly performing laboratory experiments
consumes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "86",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2022:OSS,
author = "Peng Zhou and Shu Zhao and Yuanting Yan and Xindong
Wu",
title = "Online Scalable Streaming Feature Selection via
Dynamic Decision",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "87:1--87:20",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502737",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502737",
abstract = "Feature selection is one of the core concepts in
machine learning, which hugely impacts the model's
performance. For some real-world applications, features
may exist in a stream mode that arrives one by one over
time, while we cannot know the exact number \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "87",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pooja:2022:EHO,
author = "Km Pooja and Samrat Mondal and Joydeep Chandra",
title = "Exploiting Higher Order Multi-dimensional
Relationships with Self-attention for Author Name
Disambiguation",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "88:1--88:23",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502730",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502730",
abstract = "Name ambiguity is a prevalent problem in scholarly
publications due to the unprecedented growth of digital
libraries and number of researchers. An author is
identified by their name in the absence of a unique
identifier. The documents of an author are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "88",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pei:2022:BHB,
author = "Shuyu Pei and Kun Xie and Xin Wang and Gaogang Xie and
Kenli Li and Wei Li and Yanbiao Li and Jigang Wen",
title = "{B$_h$BF}: a {Bloom} Filter Using {B$_h$} Sequences
for Multi-set Membership Query",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "89:1--89:26",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502735",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502735",
abstract = "Multi-set membership query is a fundamental issue for
network functions such as packet processing and state
machines monitoring. Given the rigid query speed and
memory requirements, it would be promising if a
multi-set query algorithm can be designed \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "89",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2022:DED,
author = "Ling Chen and Hongyu Shi",
title = "{DexDeepFM}: Ensemble Diversity Enhanced Extreme Deep
Factorization Machine Model",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "90:1--90:17",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3505272",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3505272",
abstract = "Predicting user positive response (e.g., purchases and
clicks) probability is a critical task in Web
applications. To identify predictive features from raw
data, the state-of-the-art extreme deep factorization
machine model (xDeepFM) introduces a new \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "90",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Singh:2022:MLD,
author = "Shikha Singh and Emilie Chouzenoux and Giovanni
Chierchia and Angshul Majumdar",
title = "Multi-label Deep Convolutional Transform Learning for
Non-intrusive Load Monitoring",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "91:1--91:6",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502729",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502729",
abstract = "The objective of this letter is to propose a novel
computational method to learn the state of an appliance
(ON / OFF) given the aggregate power consumption
recorded by the smart-meter. We formulate a multi-label
classification problem where the classes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "91",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sato:2022:CTG,
author = "Ryoma Sato and Makoto Yamada and Hisashi Kashima",
title = "Constant Time Graph Neural Networks",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "92:1--92:31",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502733",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502733",
abstract = "The recent advancements in graph neural networks
(GNNs) have led to state-of-the-art performances in
various applications, including chemo-informatics,
question-answering systems, and recommender systems.
However, scaling up these methods to huge graphs,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "92",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ling:2022:PAP,
author = "Zhaolong Ling and Kui Yu and Lin Liu and Jiuyong Li
and Yiwen Zhang and Xindong Wu",
title = "{PSL}: an Algorithm for Partial {Bayesian} Network
Structure Learning",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "93:1--93:25",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3508071",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3508071",
abstract = "Learning partial Bayesian network (BN) structure is an
interesting and challenging problem. In this challenge,
it is computationally expensive to use global BN
structure learning algorithms, while only one part of a
BN structure is interesting, local BN \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "93",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sharma:2022:IDA,
author = "Ms Promila Sharma and Uma Meena and Girish Kumar
Sharma",
title = "Intelligent Data Analysis using Optimized Support
Vector Machine Based Data Mining Approach for Tourism
Industry",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "94:1--94:20",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494566",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3494566",
abstract = "Data analysis involves the deployment of sophisticated
approaches from data mining methods, information
theory, and artificial intelligence in various fields
like tourism, hospitality, and so on for the extraction
of knowledge from the gathered and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "94",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huai:2022:RML,
author = "Mengdi Huai and Tianhang Zheng and Chenglin Miao and
Liuyi Yao and Aidong Zhang",
title = "On the Robustness of Metric Learning: an Adversarial
Perspective",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "95:1--95:25",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502726",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502726",
abstract = "Metric learning aims at automatically learning a
distance metric from data so that the precise
similarity between data instances can be faithfully
reflected, and its importance has long been recognized
in many fields. An implicit assumption in existing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "95",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qiu:2022:GNN,
author = "Zhaopeng Qiu and Yunfan Hu and Xian Wu",
title = "Graph Neural News Recommendation with User Existing
and Potential Interest Modeling",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "96:1--96:17",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511708",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3511708",
abstract = "Personalized news recommendations can alleviate the
information overload problem. To enable personalized
recommendation, one critical step is to learn a
comprehensive user representation to model her/his
interests. Many existing works learn user \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "96",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Goel:2022:QIP,
author = "Kanika Goel and Sander J. J. Leemans and Niels Martin
and Moe T. Wynn",
title = "Quality-Informed Process Mining: a Case for
Standardised Data Quality Annotations",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "97:1--97:47",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511707",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3511707",
abstract = "Real-life event logs, reflecting the actual executions
of complex business processes, are faced with numerous
data quality issues. Extensive data sanity checks and
pre-processing are usually needed before historical
data can be used as input to obtain \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "97",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2022:WWW,
author = "Hao Liu and Qingyu Guo and Hengshu Zhu and Fuzhen
Zhuang and Shenwen Yang and Dejing Dou and Hui Xiong",
title = "Who will Win the Data Science Competition? Insights
from {KDD Cup 2019} and Beyond",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "98:1--98:24",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511896",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3511896",
abstract = "Data science competitions are becoming increasingly
popular for enterprises collecting advanced innovative
solutions and allowing contestants to sharpen their
data science skills. Most existing studies about data
science competitions have a focus on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "98",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Oliveira:2022:AMT,
author = "Saullo H. G. Oliveira and Andr{\'e} R. Gon{\c{c}}alves
and Fernando J. {Von Zuben}",
title = "Asymmetric Multi-Task Learning with Local
Transference",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "99:1--99:30",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3514252",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3514252",
abstract = "In this article, we present the Group Asymmetric
Multi-Task Learning (GAMTL) algorithm that
automatically learns from data how tasks transfer
information among themselves at the level of a subset
of features. In practice, for each group of features
GAMTL \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "99",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhong:2022:CFC,
author = "Sheng Zhong and Vinicius M. A. Souza and Abdullah
Mueen",
title = "Combining Filtering and Cross-Correlation Efficiently
for Streaming Time Series",
journal = j-TKDD,
volume = "16",
number = "5",
pages = "100:1--100:24",
month = oct,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502738",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed May 25 07:43:38 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502738",
abstract = "Monitoring systems have hundreds or thousands of
distributed sensors gathering and transmitting
real-time streaming data. The early detection of events
in these systems, such as an earthquake in a seismic
monitoring system, is the base for essential tasks
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "100",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jirina:2022:DFO,
author = "Marcel Jirina and Said Krayem",
title = "The Distance Function Optimization for the Near
Neighbors-Based Classifiers",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "101:1--101:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434769",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3434769",
abstract = "Based on the analysis of conditions for a good
distance function we found four rules that should be
fulfilled. Then, we introduce two new distance
functions, a metric and a pseudometric one. We have
tested how they fit for distance-based classifiers,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "101",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tey:2022:MLB,
author = "Fu Jie Tey and Tin-Yu Wu and Jiann-Liang Chen",
title = "Machine Learning-based Short-term Rainfall Prediction
from Sky Data",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "102:1--102:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502731",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502731",
abstract = "To predict rainfall, our proposed model architecture
combines the Convolutional Neural Network (CNN), which
uses the ResNet-152 pre-training model, with the
Recurrent Neural Network (RNN), which uses the Long
Short-term Memory Network (LSTM) layer, for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "102",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mahmoud:2022:MOL,
author = "Reem A. Mahmoud and Hazem Hajj",
title = "Multi-objective Learning to Overcome Catastrophic
Forgetting in Time-series Applications",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "103:1--103:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502728",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3502728",
abstract = "One key objective of artificial intelligence involves
the continuous adaptation of machine learning models to
new tasks. This branch of continual learning is also
referred to as lifelong learning (LL), where a major
challenge is to minimize catastrophic \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "103",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2022:GES,
author = "Zhaobo Wang and Yanmin Zhu and Qiaomei Zhang and
Haobing Liu and Chunyang Wang and Tong Liu",
title = "Graph-Enhanced Spatial-Temporal Network for Next {POI}
Recommendation",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "104:1--104:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3513092",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3513092",
abstract = "The task of next Point-of-Interest (POI)
recommendation aims at recommending a list of POIs for
a user to visit at the next timestamp based on his/her
previous interactions, which is valuable for both
location-based service providers and users. Recent
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "104",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tipirneni:2022:SST,
author = "Sindhu Tipirneni and Chandan K. Reddy",
title = "Self-Supervised Transformer for Sparse and Irregularly
Sampled Multivariate Clinical Time-Series",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "105:1--105:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3516367",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3516367",
abstract = "Multivariate time-series data are frequently observed
in critical care settings and are typically
characterized by sparsity (missing information) and
irregular time intervals. Existing approaches for
learning representations in this domain handle these
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "105",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gu:2022:IFS,
author = "Shilin Gu and Yuhua Qian and Chenping Hou",
title = "Incremental Feature Spaces Learning with Label
Scarcity",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "106:1--106:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3516368",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3516368",
abstract = "Recently, learning and mining from data streams with
incremental feature spaces have attracted extensive
attention, where data may dynamically expand over time
in both volume and feature dimensions. Existing
approaches usually assume that the incoming \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "106",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2022:DME,
author = "Zhe Li and Chunhua Sun and Chunli Liu and Xiayu Chen
and Meng Wang and Yezheng Liu",
title = "Dual-{MGAN}: an Efficient Approach for Semi-supervised
Outlier Detection with Few Identified Anomalies",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "107:1--107:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522690",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522690",
abstract = "Outlier detection is an important task in data mining,
and many technologies for it have been explored in
various applications. However, owing to the default
assumption that outliers are not concentrated,
unsupervised outlier detection may not correctly
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "107",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2022:NNE,
author = "Yu Wang and Hanghang Tong and Ziye Zhu and Yun Li",
title = "Nested Named Entity Recognition: a Survey",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "108:1--108:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522593",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522593",
abstract = "With the rapid development of text mining, many
studies observe that text generally contains a variety
of implicit information, and it is important to develop
techniques for extracting such information. Named
Entity Recognition (NER), the first step of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "108",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xiao:2022:TQI,
author = "Houping Xiao and Shiyu Wang",
title = "Toward Quality of Information Aware Distributed
Machine Learning",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "109:1--109:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522591",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522591",
abstract = "In the era of big data, data are usually distributed
across numerous connected computing and storage units
(i.e., nodes or workers). Under such an environment,
many machine learning problems can be reformulated as a
consensus optimization problem, which \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "109",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cai:2022:ANI,
author = "Jianghui Cai and Yuqing Yang and Haifeng Yang and
Xujun Zhao and Jing Hao",
title = "{ARIS}: a Noise Insensitive Data Pre-Processing Scheme
for Data Reduction Using Influence Space",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "110:1--110:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522592",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522592",
abstract = "The extensive growth of data quantity has posed many
challenges to data analysis and retrieval. Noise and
redundancy are typical representatives of the
above-mentioned challenges, which may reduce the
reliability of analysis and retrieval results and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "110",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2022:IBL,
author = "Xiaoying Zhang and Hong Xie and John C. S. Lui",
title = "Improving Bandit Learning Via Heterogeneous
Information Networks: Algorithms and Applications",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "111:1--111:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522590",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522590",
abstract = "Contextual bandit serves as an invaluable tool to
balance the exploration vs. exploitation tradeoff in
various applications such as online recommendation. In
many applications, heterogeneous information networks
(HINs) provide rich side information for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "111",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Desantis:2022:FBM,
author = "Derek Desantis and Erik Skau and Duc P. Truong and
Boian Alexandrov",
title = "Factorization of Binary Matrices: Rank Relations,
Uniqueness and Model Selection of {Boolean}
Decomposition",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "112:1--112:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522594",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522594",
abstract = "The application of binary matrices are numerous.
Representing a matrix as a mixture of a small
collection of latent vectors via low-rank decomposition
is often seen as an advantageous method to interpret
and analyze data. In this work, we examine the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "112",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2022:SWT,
author = "Xing Yang and Chen Zhang and Baihua Zheng",
title = "Segment-Wise Time-Varying Dynamic {Bayesian} Network
with Graph Regularization",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "113:1--113:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522589",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522589",
abstract = "Time-varying dynamic Bayesian network (TVDBN) is
essential for describing time-evolving directed
conditional dependence structures in complex
multivariate systems. In this article, we construct a
TVDBN model, together with a score-based method for its
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "113",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2022:CSD,
author = "Shaowei Wang and Lingling Zhang and Xuan Luo and Yi
Yang and Xin Hu and Tao Qin and Jun Liu",
title = "Computer Science Diagram Understanding with Topology
Parsing",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "114:1--114:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522689",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522689",
abstract = "Diagram is a special form of visual expression for
representing complex concepts, logic, and knowledge,
which widely appears in educational scenes such as
textbooks, blogs, and encyclopedias. Current research
on diagrams preliminarily focuses on natural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "114",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jang:2022:FKS,
author = "Jun-Gi Jang and Chaeheum Park and Changwon Jang and
Geonsoo Kim and U. Kang",
title = "Finding Key Structures in {MMORPG} Graph with
Hierarchical Graph Summarization",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "115:1--115:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522691",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3522691",
abstract = "What are the key structures existing in a large
real-world MMORPG (Massively Multiplayer Online
Role-Playing Game) graph? How can we compactly
summarize an MMORPG graph with hierarchical node
labels, considering substructures at different levels
of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "115",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2022:PPN,
author = "Nengjun Zhu and Jian Cao and Xinjiang Lu and Chuanren
Liu and Hao Liu and Yanyan Li and Xiangfeng Luo and Hui
Xiong",
title = "Predicting a Person's Next Activity Region with a
Dynamic Region-Relation-Aware Graph Neural Network",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "116:1--116:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529091",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3529091",
abstract = "The understanding of people's inter-regional mobility
behaviors, such as predicting the next activity region
(AR) or uncovering the intentions for regional
mobility, is of great value to public administration or
business interests. While there are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "116",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xiong:2022:GDL,
author = "Haoyi Xiong and Ruosi Wan and Jian Zhao and Zeyu Chen
and Xingjian Li and Zhanxing Zhu and Jun Huan",
title = "{GrOD}: Deep Learning with Gradients Orthogonal
Decomposition for Knowledge Transfer, Distillation, and
Adversarial Training",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "117:1--117:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530836",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3530836",
abstract = "Regularization that incorporates the linear
combination of empirical loss and explicit
regularization terms as the loss function has been
frequently used for many machine learning tasks. The
explicit regularization term is designed in different
types, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "117",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Benarous:2022:SLH,
author = "Maya Benarous and Eran Toch and Irad Ben-gal",
title = "Synthesis of Longitudinal Human Location Sequences:
Balancing Utility and Privacy",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "118:1--118:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529260",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3529260",
abstract = "People's location data are continuously tracked from
various devices and sensors, enabling an ongoing
analysis of sensitive information that can violate
people's privacy and reveal confidential information.
Synthetic data have been used to generate \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "118",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Coscia:2022:GEM,
author = "Michele Coscia",
title = "Generalized {Euclidean} Measure to Estimate Distances
on Multilayer Networks",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "119:1--119:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529396",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3529396",
abstract = "Estimating the distance covered by a spreading event
on a network can lead to a better understanding of
epidemics, economic growth, and human behavior. There
are many methods solving this problem-which has been
called Node Vector Distance (NVD)-for single \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "119",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yildiz:2022:SRR,
author = "Ilkay Yildiz and Jennifer Dy and Deniz Erdogmus and
Susan Ostmo and J. Peter Campbell and Michael F. Chiang
and Stratis Ioannidis",
title = "Spectral Ranking Regression",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "120:1--120:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530693",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3530693",
abstract = "We study the problem of ranking regression, in which a
dataset of rankings is used to learn Plackett-Luce
scores as functions of sample features. We propose a
novel spectral algorithm to accelerate learning in
ranking regression. Our main technical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "120",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2022:PDB,
author = "Ziyue Li and Hao Yan and Fugee Tsung and Ke Zhang",
title = "Profile Decomposition Based Hybrid Transfer Learning
for Cold-Start Data Anomaly Detection",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "121:1--121:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530990",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3530990",
abstract = "Anomaly detection is an essential task for quality
management in smart manufacturing. An accurate
data-driven detection method usually needs enough data
and labels. However, in practice, there commonly exist
newly set-up processes in manufacturing, and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "121",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hu:2022:SDP,
author = "Yue Hu and Ao Qu and Yanbing Wang and Daniel B. Work",
title = "Streaming Data Preprocessing via Online Tensor
Recovery for Large Environmental Sensor Networks",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "122:1--122:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532189",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532189",
abstract = "Measuring the built and natural environment at a
fine-grained scale is now possible with low-cost urban
environmental sensor networks. However, fine-grained
city-scale data analysis is complicated by tedious data
cleaning including removing outliers and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "122",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ma:2022:HFD,
author = "Pengfei Ma and Youxi Wu and Yan Li and Lei Guo and He
Jiang and Xingquan Zhu and Xindong Wu",
title = "{HW-Forest}: Deep Forest with Hashing Screening and
Window Screening",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "123:1--123:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532193",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532193",
abstract = "As a novel deep learning model, gcForest has been
widely used in various applications. However, current
multi-grained scanning of gcForest produces many
redundant feature vectors, and this increases the time
cost of the model. To screen out redundant \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "123",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pellegrina:2022:MMC,
author = "Leonardo Pellegrina and Cyrus Cousins and Fabio Vandin
and Matteo Riondato",
title = "{MCRapper}: {Monte-Carlo Rademacher} Averages for
Poset Families and Approximate Pattern Mining",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "124:1--124:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532187",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532187",
abstract = "``I'm an MC still as honest'' --- Eminem, Rap God We
present MCRapper, an algorithm for efficient
computation of Monte-Carlo Empirical Rademacher
Averages (MCERA) for families of functions exhibiting
poset (e.g., lattice) structure, such as those that
arise in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "124",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bechini:2022:NBF,
author = "Alessio Bechini and Alessandro Bondielli and Jos{\'e}
Luis Corcuera B{\'a}rcena and Pietro Ducange and
Francesco Marcelloni and Alessandro Renda",
title = "A News-Based Framework for Uncovering and Tracking
City Area Profiles: Assessment in {Covid-19} Setting",
journal = j-TKDD,
volume = "16",
number = "6",
pages = "125:1--125:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532186",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Nov 16 08:20:02 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532186",
abstract = "In the last years, there has been an ever-increasing
interest in profiling various aspects of city life,
especially in the context of smart cities. This
interest has become even more relevant recently when we
have realized how dramatic events, such as the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "125",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:MCS,
author = "Ting-Yun Wang and Chiao-Ting Chen and Ju-Chun Huang
and Szu-Hao Huang",
title = "Modeling Cross-session Information with Multi-interest
Graph Neural Networks for the Next-item
Recommendation",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532192",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532192",
abstract = "Next-item recommendation involves predicting the next
item of interest of a given user from their past
behavior. Users tend to browse and purchase various
items on e-commerce websites according to their varied
interests and needs, as reflected in their \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:SRM,
author = "Yu Wang and Chuan Chen and Jinrong Lai and Lele Fu and
Yuren Zhou and Zibin Zheng",
title = "A Self-Representation Method with Local Similarity
Preserving for Fast Multi-View Outlier Detection",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532191",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532191",
abstract = "With the rapidly growing attention to multi-view data
in recent years, multi-view outlier detection has
become a rising field with intense research. These
researches have made some success, but still exist some
issues that need to be solved. First, many \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:LAA,
author = "Ke Wang and Yanmin Zhu and Haobing Liu and Tianzi Zang
and Chunyang Wang",
title = "Learning Aspect-Aware High-Order Representations from
Ratings and Reviews for Recommendation",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532188",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532188",
abstract = "Textual reviews contain rich semantic information that
is useful for making better recommendation, as such
semantic information may indicate more fine-grained
preferences of users. Recent efforts make considerable
improvement on recommendation by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:IGL,
author = "Zan Zhang and Lin Liu and Jiuyong Li and Xindong Wu",
title = "Integrating Global and Local Feature Selection for
Multi-Label Learning",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532190",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532190",
abstract = "Multi-label learning deals with the problem where an
instance is associated with multiple labels
simultaneously. Multi-label data is often of high
dimensionality and has many noisy, irrelevant, and
redundant features. As an important machine learning
task, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:EAM,
author = "Xinye Chen and Stefan G{\"u}ttel",
title = "An Efficient Aggregation Method for the Symbolic
Representation of Temporal Data",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532622",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532622",
abstract = "Symbolic representations are a useful tool for the
dimension reduction of temporal data, allowing for the
efficient storage of and information retrieval from
time series. They can also enhance the training of
machine learning algorithms on time series \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:SSG,
author = "Lei Li and Mengjiao Yan and Zhenchao Tao and Huanhuan
Chen and Xindong Wu",
title = "Semi-Supervised Graph Pattern Matching and Rematching
for Expert Community Location",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532623",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532623",
abstract = "Graph pattern matching (GPM) is widely used in social
network analysis, such as expert finding, social group
query, and social position detection. Technically, GPM
is to find matched subgraphs that meet the requirements
of pattern graphs in big social \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gu:2023:IMC,
author = "Zhibin Gu and Songhe Feng",
title = "Individuality Meets Commonality: a Unified Graph
Learning Framework for Multi-View Clustering",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532612",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532612",
abstract = "Multi-view clustering, which aims at boosting the
clustering performance by leveraging the individual
information and the common information of multi-view
data, has gained extensive consideration in recent
years. However, most existing multi-view \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chakraborty:2023:SAM,
author = "Roshni Chakraborty and Ritwika Das and Joydeep
Chandra",
title = "{SigGAN}: Adversarial Model for Learning Signed
Relationships in Networks",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532610",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532610",
abstract = "Signed link prediction in graphs is an important
problem that has applications in diverse domains. It is
a binary classification problem that predicts whether
an edge between a pair of nodes is positive or
negative. Existing approaches for link prediction
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:DGC,
author = "Fuxian Li and Jie Feng and Huan Yan and Guangyin Jin
and Fan Yang and Funing Sun and Depeng Jin and Yong
Li",
title = "Dynamic Graph Convolutional Recurrent Network for
Traffic Prediction: Benchmark and Solution",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532611",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532611",
abstract = "Traffic prediction is the cornerstone of intelligent
transportation system. Accurate traffic forecasting is
essential for the applications of smart cities, i.e.,
intelligent traffic management and urban planning.
Although various methods are proposed for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2023:URD,
author = "Gengsen Huang and Wensheng Gan and Jian Weng and
Philip S. Yu",
title = "{US-Rule}: Discovering Utility-driven Sequential
Rules",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532613",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532613",
abstract = "Utility-driven mining is an important task in data
science and has many applications in real life.
High-utility sequential pattern mining (HUSPM) is one
kind of utility-driven mining. It aims at discovering
all sequential patterns with high utility. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:MCR,
author = "Jiapu Wang and Boyue Wang and Junbin Gao and Yongli Hu
and Baocai Yin",
title = "Multi-Concept Representation Learning for Knowledge
Graph Completion",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533017",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3533017",
abstract = "Knowledge Graph Completion (KGC) aims at inferring
missing entities or relations by embedding them in a
low-dimensional space. However, most existing KGC
methods generally fail to handle the complex concepts
hidden in triplets, so the learned embeddings
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kirielle:2023:UGB,
author = "Nishadi Kirielle and Peter Christen and Thilina
Ranbaduge",
title = "Unsupervised Graph-Based Entity Resolution for Complex
Entities",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533016",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3533016",
abstract = "Entity resolution (ER) is the process of linking
records that refer to the same entity. Traditionally,
this process compares attribute values of records to
calculate similarities and then classifies pairs of
records as referring to the same entity or not
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kwon:2023:EBM,
author = "Soonki Kwon and Younghoon Lee",
title = "Explainability-Based Mix-Up Approach for Text Data
Augmentation",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533048",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3533048",
abstract = "Text augmentation is a strategy for increasing the
diversity of training examples without explicitly
collecting new data. Owing to the efficiency and
effectiveness of text augmentation, numerous
augmentation methodologies have been proposed. Among
them, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:CBS,
author = "Qian Li and Xiangmeng Wang and Zhichao Wang and
Guandong Xu",
title = "Be Causal: De-Biasing Social Network Confounding in
Recommendation",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533725",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3533725",
abstract = "In recommendation systems, the existence of the
missing-not-at-random (MNAR) problem results in the
selection bias issue, degrading the recommendation
performance ultimately. A common practice to address
MNAR is to treat missing entries from the so-called
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Luo:2023:SDH,
author = "Xiao Luo and Haixin Wang and Daqing Wu and Chong Chen
and Minghua Deng and Jianqiang Huang and Xian-Sheng
Hua",
title = "A Survey on Deep Hashing Methods",
journal = j-TKDD,
volume = "17",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532624",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Thu Mar 16 07:36:49 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3532624",
abstract = "Nearest neighbor search aims at obtaining the samples
in the database with the smallest distances from them
to the queries, which is a basic task in a range of
fields, including computer vision and data mining.
Hashing is one of the most widely used methods for its
computational and storage efficiency. With the
development of deep learning, deep hashing methods show
more advantages than traditional methods. In this
survey, we detailedly investigate current deep hashing
algorithms including deep supervised hashing and deep
unsupervised hashing. Specifically, we categorize deep
supervised hashing methods into pairwise methods,
ranking-based methods, pointwise methods as well as
quantization according to how measuring the
similarities of the learned hash codes. Moreover, deep
unsupervised hashing is categorized into similarity
reconstruction-based methods, pseudo-label-based
methods, and prediction-free self-supervised
learning-based methods based on their semantic learning
manners. We also introduce three related important
topics including semi-supervised deep hashing, domain
adaption deep hashing, and multi-modal deep hashing.
Meanwhile, we present some commonly used public
datasets and the scheme to measure the performance of
deep hashing algorithms. Finally, we discuss some
potential research directions in conclusion.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nguyen:2023:DAG,
author = "Hung T. Nguyen and Pierre J. Liang and Leman Akoglu",
title = "Detecting Anomalous Graphs in Labeled Multi-Graph
Databases",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "16:1--16:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533770",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3533770",
abstract = "Within a large database G containing graphs with
labeled nodes and directed, multi-edges; how can we
detect the anomalous graphs? Most existing work are
designed for plain (unlabeled) and/or simple
(unweighted) graphs. We introduce CODEtect, the first.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lu:2023:SDI,
author = "Xun Lu and Songhe Feng",
title = "Structure Diversity-Induced Anchor Graph Fusion for
Multi-View Clustering",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "17:1--17:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3534931",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3534931",
abstract = "The anchor graph structure has been widely used to
speed up large-scale multi-view clustering and
exhibited promising performance. How to effectively
integrate the anchor graphs on multiple views to
achieve enhanced clustering performance still remains a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:GML,
author = "Lichen Wang and Zhengming Ding and Kasey Lee and
Seungju Han and Jae-Joon Han and Changkyu Choi and Yun
Fu",
title = "Generative Multi-Label Correlation Learning",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "18:1--18:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3538708",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3538708",
abstract = "In real-world applications, a single instance could
have more than one label. To solve this task,
multi-label learning methods emerged in recent years.
It is a more challenging problem for many reasons, such
as complex label correlation, long-tail label
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lu:2023:DPE,
author = "Xun Lu and Songhe Feng and Gengyu Lyu and Yi Jin and
Congyan Lang",
title = "Distance-Preserving Embedding Adaptive Bipartite Graph
Multi-View Learning with Application to Multi-Label
Classification",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "19:1--19:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3537900",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3537900",
abstract = "Graph-based multi-view learning has attracted much
attention due to the efficacy of fusing the information
from different views. However, most of them exhibit
high computational complexity. We propose an
anchor-based bipartite graph embedding approach to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:CUV,
author = "Qianru Wang and Bin Guo and Lu Cheng and Zhiwen Yu and
Huan Liu",
title = "{CausalSE}: Understanding Varied Spatial Effects with
Missing Data Toward Adding New Bike-sharing Stations",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "20:1--20:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3536427",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3536427",
abstract = "To meet the growing bike-sharing demands and make
people's travel convenient, the companies need to add
new stations at locations where demands exceed supply.
Before making reliable decisions on adding new
stations, it is required to understand the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jerez:2023:EAD,
author = "Carlos Ivan Jerez and Jun Zhang and Marcia R. Silva",
title = "On Equivalence of Anomaly Detection Algorithms",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "21:1--21:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3536428",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3536428",
abstract = "In most domains, anomaly detection is typically cast
as an unsupervised learning problem because of the
infeasibility of labeling large datasets. In this
setup, the evaluation and comparison of different
anomaly detection algorithms is difficult. Although
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Freris:2023:IEV,
author = "Nikolaos M. Freris and Ahmad Ajalloeian and Michalis
Vlachos",
title = "Interpretable Embedding and Visualization of
Compressed Data",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "22:1--22:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3537901",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3537901",
abstract = "Traditional embedding methodologies, also known as
dimensionality reduction techniques, assume the
availability of exact pairwise distances between the
high-dimensional objects that will be embedded in a
lower dimensionality. In this article, we propose
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:MIF,
author = "Shaokang Wang and Li Pan and Yu Wu",
title = "Meta-Information Fusion of Hierarchical Semantics
Dependency and Graph Structure for Structured Text
Classification",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "23:1--23:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3537971",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3537971",
abstract = "Structured text with plentiful hierarchical structure
information is an important part in real-world complex
texts. Structured text classification is attracting
more attention in natural language processing due to
the increasing complexity of application \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2023:NGI,
author = "Xuliang Zhu and Xin Huang and Longxu Sun and Jiming
Liu",
title = "A Novel Graph Indexing Approach for Uncovering
Potential {COVID-19} Transmission Clusters",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "24:1--24:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3538492",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3538492",
abstract = "The COVID-19 pandemic has caused the society lockdowns
and a large number of deaths in many countries.
Potential transmission cluster discovery is to find all
suspected users with infections, which is greatly
needed to fast discover virus transmission \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Atyabi:2023:SCA,
author = "Adham Atyabi and Frederick Shic and Jiajun Jiang and
Claire E. Foster and Erin Barney and Minah Kim and
Beibin Li and Pamela Ventola and Chung Hao Chen",
title = "Stratification of Children with Autism Spectrum
Disorder Through Fusion of Temporal Information in
Eye-gaze Scan-Paths",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "25:1--25:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3539226",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3539226",
abstract = "Background: Looking pattern differences are shown to
separate individuals with Autism Spectrum Disorder
(ASD) and Typically Developing (TD) controls. Recent
studies have shown that, in children with ASD, these
patterns change with intellectual and social \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:GDF,
author = "Hongjie Chen and Ryan A. Rossi and Kanak Mahadik and
Sungchul Kim and Hoda Eldardiry",
title = "Graph Deep Factors for Probabilistic Time-series
Forecasting",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "26:1--26:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3543511",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3543511",
abstract = "Effective time-series forecasting methods are of
significant importance to solve a broad spectrum of
research problems. Deep probabilistic forecasting
techniques have recently been proposed for modeling
large collections of time-series. However, these
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jha:2023:SCL,
author = "Akshita Jha and Vineeth Rakesh and Jaideep
Chandrashekar and Adithya Samavedhi and Chandan K.
Reddy",
title = "Supervised Contrastive Learning for Interpretable
Long-Form Document Matching",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "27:1--27:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3542822",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3542822",
abstract = "Recent advancements in deep learning techniques have
transformed the area of semantic text matching (STM).
However, most state-of-the-art models are designed to
operate with short documents such as tweets, user
reviews, comments, and so on. These models \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Vajiac:2023:DIT,
author = "Catalina Vajiac and Meng-Chieh Lee and Aayushi
Kulshrestha and Sacha Levy and Namyong Park and Andreas
Olligschlaeger and Cara Jones and Reihaneh Rabbany and
Christos Faloutsos",
title = "{DeltaShield}: Information Theory for Human-
Trafficking Detection",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "28:1--28:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3563040",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3563040",
abstract = "Given a million escort advertisements, how can we spot
near-duplicates? Such micro-clusters of ads are usually
signals of human trafficking (HT). How can we summarize
them to convince law enforcement to act? Spotting
micro-clusters of near-duplicate \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2023:SHI,
author = "Jianhui Sun and Ying Yang and Guangxu Xun and Aidong
Zhang",
title = "Scheduling Hyperparameters to Improve Generalization:
From Centralized {SGD} to Asynchronous {SGD}",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "29:1--29:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544782",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3544782",
abstract = "This article $^1$ studies how to schedule
hyperparameters to improve generalization of both
centralized single-machine stochastic gradient descent
(SGD) and distributed asynchronous SGD (ASGD). SGD
augmented with momentum variants (e.g., heavy ball
momentum (\ldots{}))",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Miao:2023:DPC,
author = "Xiaoye Miao and Huanhuan Peng and Yunjun Gao and
Zongfu Zhang and Jianwei Yin",
title = "On Dynamically Pricing Crowdsourcing Tasks",
journal = j-TKDD,
volume = "17",
number = "2",
pages = "30:1--30:??",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544018",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:44 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3544018",
abstract = "Crowdsourcing techniques have been extensively
explored in the past decade, including task allocation,
quality assessment, and so on. Most of professional
crowdsourcing platforms adopt the fixed pricing scheme
to offer a fixed price for crowd tasks. It is
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kamhoua:2023:GGG,
author = "Barakeel Fanseu Kamhoua and Lin Zhang and Kaili Ma and
James Cheng and Bo Li and Bo Han",
title = "{GRACE}: a General Graph Convolution Framework for
Attributed Graph Clustering",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "31:1--31:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544977",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3544977",
abstract = "Attributed graph clustering (AGC) is an important
problem in graph mining as more and more complex data
in real-world have been represented in graphs with
attributed nodes. While it is a common practice to
leverage both attribute and structure information
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhai:2023:LGR,
author = "Penglong Zhai and Shihua Zhang",
title = "Learnable Graph-Regularization for Matrix
Decomposition",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "32:1--32:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544781",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3544781",
abstract = "Low-rank approximation models of data matrices have
become important machine learning and data mining tools
in many fields, including computer vision, text mining,
bioinformatics, and many others. They allow for
embedding high-dimensional data into low-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:RLP,
author = "Jinwei Chen and Zefang Zong and Yunlin Zhuang and Huan
Yan and Depeng Jin and Yong Li",
title = "Reinforcement Learning for Practical Express Systems
with Mixed Deliveries and Pickups",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "33:1--33:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546952",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3546952",
abstract = "In real-world express systems, couriers need to
satisfy not only the delivery demands but also the
pick-up demands of customers. Delivery and pickup tasks
are usually mixed together within integrated routing
plans. Such a mixed routing problem can be \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Feng:2023:CTE,
author = "Tao Feng and Sirui Song and Tong Xia and Yong Li",
title = "Contact Tracing and Epidemic Intervention via Deep
Reinforcement Learning",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "34:1--34:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546870",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3546870",
abstract = "The recent outbreak of COVID-19 poses a serious threat
to people's lives. Epidemic control strategies have
also caused damage to the economy by cutting off
humans' daily commute. In this article, we develop an
Individual-based Reinforcement Learning \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wan:2023:PMT,
author = "Mingyang Wan and Daochen Zha and Ninghao Liu and Na
Zou",
title = "In-Processing Modeling Techniques for Machine Learning
Fairness: a Survey",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "35:1--35:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3551390",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3551390",
abstract = "Machine learning models are becoming pervasive in
high-stakes applications. Despite their clear benefits
in terms of performance, the models could show
discrimination against minority groups and result in
fairness issues in a decision-making process,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Long:2023:MAC,
author = "Qiang Long and Adil Bagirov and Sona Taheri and Nargiz
Sultanova and Xue Wu",
title = "Methods and Applications of Clusterwise Linear
Regression: a Survey and Comparison",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "36:1--36:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3550074",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3550074",
abstract = "Clusterwise linear regression (CLR) is a well-known
technique for approximating a data using more than one
linear function. It is based on the combination of
clustering and multiple linear regression methods. This
article provides a comprehensive survey \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2023:OMO,
author = "Youxi Wu and Mingjie Chen and Yan Li and Jing Liu and
Zhao Li and Jinyan Li and Xindong Wu",
title = "{ONP-Miner}: One-off Negative Sequential Pattern
Mining",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "37:1--37:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3549940",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3549940",
abstract = "Negative sequential pattern mining (SPM) is an
important SPM research topic. Unlike positive SPM,
negative SPM can discover events that should have
occurred but have not occurred, and it can be used for
financial risk management and fraud detection.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Carchiolo:2023:ENP,
author = "Vincenza Carchiolo and Marco Grassia and Alessandro
Longheu and Michele Malgeri and Giuseppe Mangioni",
title = "Efficient Node {PageRank} Improvement via
Link-building using Geometric Deep Learning",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "38:1--38:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3551642",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3551642",
abstract = "Centrality is a relevant topic in the field of network
research, due to its various theoretical and practical
implications. In general, all centrality metrics aim at
measuring the importance of nodes (according to some
definition of importance), and such importance scores
are used to rank the nodes in the network, therefore
the rank improvement is a strictly related topic. In a
given network, the rank improvement is achieved by
establishing new links, therefore the question shifts
to which and how many links should be collected to get
a desired rank. This problem, also known as
link-building has been shown to be NP-hard, and most
heuristics developed failed in obtaining good
performance with acceptable computational complexity.
In this article, we present LB--GDM, a novel approach
that leverages Geometric Deep Learning to tackle the
link-building problem. To validate our proposal, 31
real-world networks were considered; tests show that
LB--GDM performs significantly better than the
state-of-the-art heuristics, while having a comparable
or even lower computational complexity, which allows it
to scale well even to large networks.\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2023:LLM,
author = "Linli Jiang and Chao-Xiong Chen and Chao Chen",
title = "{L2MM}: Learning to Map Matching with Deep Models for
Low-Quality {GPS} Trajectory Data",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "39:1--39:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3550486",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3550486",
abstract = "Map matching is a fundamental research topic with the
objective of aligning GPS trajectories to paths on the
road network. However, existing models fail to achieve
satisfactory performance for low-quality (i.e., noisy,
low-frequency, and non-uniform) \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:EIS,
author = "Yihong Zhang and Takahiro Hara",
title = "Explainable Integration of Social Media Background in
a Dynamic Neural Recommender",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "40:1--40:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3550279",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3550279",
abstract = "Recommender systems nowadays are commonly deployed in
e-commerce platforms to help customers making purchase
decisions. Dynamic recommender considers not only
static user-item interaction data, but the temporal
information at the time of recommendation. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:MRB,
author = "Yashen Wang and Zhaoyu Wang and Huanhuan Zhang and
Zhirun Liu",
title = "Microblog Retrieval Based on Concept-Enhanced
Pre-Training Model",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "41:1--41:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3552311",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3552311",
abstract = "Despite substantial interest in applications of neural
networks to information retrieval, neural ranking
models have mostly been applied to conventional ad-hoc
retrieval tasks over web pages and newswire articles.
This article proposes a concept-enhanced \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wei:2023:DSB,
author = "Xuemei Wei and Yezheng Liu and Jianshan Sun and
Yuanchun Jiang and Qifeng Tang and Kun Yuan",
title = "Dual Subgraph-Based Graph Neural Network for
Friendship Prediction in Location-Based Social
Networks",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "42:1--42:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3554981",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3554981",
abstract = "With the wide use of Location-Based Social Networks
(LBSNs), predicting user friendship from online social
relations and offline trajectory data is of great value
to improve the platform service quality and user
satisfaction. Existing methods mainly focus \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2023:DTL,
author = "Xin Jiang and Zhengxin Yu and Chao Hai and Hongbo Liu
and Xindong Wu and Tomas Ward",
title = "{DNformer}: Temporal Link Prediction with Transfer
Learning in Dynamic Networks",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "43:1--43:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3551892",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3551892",
abstract = "Temporal link prediction (TLP) is among the most
important graph learning tasks, capable of predicting
dynamic, time-varying links within networks. The key
problem of TLP is how to explore potential
link-evolving tendency from the increasing number of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Aleryani:2023:MIE,
author = "Aliya Aleryani and Aaron Bostrom and Wenjia Wang and
Beatriz Iglesia",
title = "Multiple Imputation Ensembles for Time Series
({MIE-TS})",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "44:1--44:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3551643",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3551643",
abstract = "Time series classification has become an interesting
field of research, thanks to the extensive studies
conducted in the past two decades. Time series may have
missing data, which may affect both the representation
and also modeling of time series. Thus, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:VGA,
author = "Dongjie Li and Dong Li and Guang Lian",
title = "Variational Graph Autoencoder with Adversarial Mutual
Information Learning for Network Representation
Learning",
journal = j-TKDD,
volume = "17",
number = "3",
pages = "45:1--45:??",
month = apr,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3555809",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Fri Mar 31 09:53:45 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3555809",
abstract = "With the success of Graph Neural Network (GNN) in
network data, some GNN-based representation learning
methods for networks have emerged recently. Variational
Graph Autoencoder (VGAE) is a basic GNN framework for
network representation. Its purpose is to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2023:CTIa,
author = "Gongqing Wu and Liangzhu Zhou and Jiazhu Xia and Lei
Li and Xianyu Bao and Xindong Wu",
title = "Crowdsourcing Truth Inference Based on Label
Confidence Clustering",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "46:1--46:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3556545",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3556545",
abstract = "Truth inference can help solve some difficult problems
of data integration in crowdsourcing. Crowdsourced
workers are not experts and their labeling ability
varies greatly; therefore, in practical applications,
it is difficult to determine whether the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sehnan:2023:DSI,
author = "Dhruv Sehnan and Vasu Goel and Sarah Masud and Chhavi
Jain and Vikram Goyal and Tanmoy Chakraborty",
title = "{DiVA}: a Scalable, Interactive and Customizable
Visual Analytics Platform for Information Diffusion on
Large Networks",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "47:1--47:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3558771",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3558771",
abstract = "With an increasing outreach of digital platforms in
our lives, researchers have taken a keen interest in
studying different facets of social interactions.
Analyzing the spread of information ( aka diffusion)
has brought forth multiple research areas such
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{E:2023:CEC,
author = "Jinlong E. and Mo Li and Jianqiang Huang",
title = "{CrowdAtlas}: Estimating Crowd Distribution within the
Urban Rail Transit System",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "48:1--48:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3558521",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3558521",
abstract = "While urban rail transit systems are playing an
increasingly important role in meeting the
transportation demands of people, precise awareness of
how the human crowd is distributed within such a system
is highly necessary, which serves a range of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2023:PFL,
author = "Lei Yang and Jiaming Huang and Wanyu Lin and Jiannong
Cao",
title = "Personalized Federated Learning on {Non-IID} Data via
Group-based Meta-learning",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "49:1--49:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3558005",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3558005",
abstract = "Personalized federated learning (PFL) has emerged as a
paradigm to provide a personalized model that can fit
the local data distribution of each client. One natural
choice for PFL is to leverage the fast adaptation
capability of meta-learning, where it \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hermanns:2023:GSG,
author = "Judith Hermanns and Konstantinos Skitsas and Anton
Tsitsulin and Marina Munkhoeva and Alexander Kyster and
Simon Nielsen and Alexander M. Bronstein and Davide
Mottin and Panagiotis Karras",
title = "{GRASP}: Scalable Graph Alignment by Spectral
Corresponding Functions",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "50:1--50:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561058",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3561058",
abstract = "What is the best way to match the nodes of two graphs?
This graph alignment problem generalizes graph
isomorphism and arises in applications from social
network analysis to bioinformatics. Some solutions
assume that auxiliary information on known matches
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nakajima:2023:RWS,
author = "Kazuki Nakajima and Kazuyuki Shudo",
title = "Random Walk Sampling in Social Networks Involving
Private Nodes",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "51:1--51:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561388",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3561388",
abstract = "Analysis of social networks with limited data access
is challenging for third parties. To address this
challenge, a number of studies have developed
algorithms that estimate properties of social networks
via a simple random walk. However, most existing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shui:2023:LOL,
author = "Changjian Shui and William Wang and Ihsen Hedhli and
Chi Man Wong and Feng Wan and Boyu Wang and Christian
Gagn{\'e}",
title = "Lifelong Online Learning from Accumulated Knowledge",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "52:1--52:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3563947",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3563947",
abstract = "In this article, we formulate lifelong learning as an
online transfer learning procedure over consecutive
tasks, where learning a given task depends on the
accumulated knowledge. We propose a novel theoretical
principled framework, lifelong online. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dai:2023:DMV,
author = "Shaojie Dai and Jinshuai Wang and Chao Huang and
Yanwei Yu and Junyu Dong",
title = "Dynamic Multi-View Graph Neural Networks for Citywide
Traffic Inference",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "53:1--53:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564754",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3564754",
abstract = "Accurate citywide traffic inference is critical for
improving intelligent transportation systems with smart
city applications. However, this task is very
challenging given the limited training data, due to the
high cost of sensor installment and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "53",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ling:2023:STD,
author = "Shuai Ling and Zhe Yu and Shaosheng Cao and Haipeng
Zhang and Simon Hu",
title = "{STHAN}: Transportation Demand Forecasting with
Compound Spatio-Temporal Relationships",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "54:1--54:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565578",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3565578",
abstract = "Transportation demand forecasting is a critical
precondition of optimal online transportation dispatch,
which will greatly reduce drivers' wasted mileage and
customers' waiting time, contributing to economic and
environmental sustainability. Though \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "54",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2023:MMI,
author = "Jiaying Liu and Feng Xia and Jing Ren and Bo Xu and
Guansong Pang and Lianhua Chi",
title = "{MIRROR}: Mining Implicit Relationships via
Structure-Enhanced Graph Convolutional Networks",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "55:1--55:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564531",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3564531",
abstract = "Data explosion in the information society drives
people to develop more effective ways to extract
meaningful information. Extracting semantic information
and relational information has emerged as a key mining
primitive in a wide variety of practical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "55",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2023:TTA,
author = "Zhi Liu and Yang Chen and Feng Xia and Jixin Bian and
Bing Zhu and Guojiang Shen and Xiangjie Kong",
title = "{TAP}: Traffic Accident Profiling via Multi-Task
Spatio-Temporal Graph Representation Learning",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "56:1--56:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564594",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3564594",
abstract = "Predicting traffic accidents can help traffic
management departments respond to sudden traffic
situations promptly, improve drivers' vigilance, and
reduce losses caused by traffic accidents. However, the
causality of traffic accidents is complex and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "56",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:TRR,
author = "Lei Chen and Jie Cao and Haicheng Tao and Jia Wu",
title = "Trip Reinforcement Recommendation with Graph-based
Representation Learning",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "57:1--57:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564609",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3564609",
abstract = "Tourism is an important industry and a popular leisure
activity involving billions of tourists per annum. One
challenging problem tourists face is identifying
attractive Places-of-Interest (POIs) and planning the
personalized trip with time constraints. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "57",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:LEL,
author = "Huiming Chen and Huandong Wang and Quanming Yao and
Yong Li and Depeng Jin and Qiang Yang",
title = "{LoSAC}: an Efficient Local Stochastic Average Control
Method for Federated Optimization",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "58:1--58:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3566128",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3566128",
abstract = "Federated optimization (FedOpt), which targets at
collaboratively training a learning model across a
large number of distributed clients, is vital for
federated learning. The primary concerns in FedOpt can
be attributed to the model divergence and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "58",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jing:2023:LSR,
author = "Mengyuan Jing and Yanmin Zhu and Yanan Xu and Haobing
Liu and Tianzi Zang and Chunyang Wang and Jiadi Yu",
title = "Learning Shared Representations for Recommendation
with Dynamic Heterogeneous Graph Convolutional
Networks",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "59:1--59:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565575",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3565575",
abstract = "Graph Convolutional Networks (GCNs) have been widely
used for collaborative filtering, due to their
effectiveness in exploiting high-order collaborative
signals. However, two issues have not been well
addressed by existing studies. First, usually only one
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "59",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:SSD,
author = "Yizong Wang and Dong Zhao and Yajie Ren and Desheng
Zhang and Huadong Ma",
title = "{SPAP}: Simultaneous Demand Prediction and Planning
for Electric Vehicle Chargers in a New City",
journal = j-TKDD,
volume = "17",
number = "4",
pages = "60:1--60:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565577",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:29:25 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3565577",
abstract = "For a new city that is committed to promoting Electric
Vehicles (EVs), it is significant to plan the public
charging infrastructure where charging demands are
high. However, it is difficult to predict charging
demands before the actual deployment of EV \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "60",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2023:ESM,
author = "Dandan Lin and Victor Junqiu Wei and Raymond Chi-Wing
Wong",
title = "Effective and Scalable Manifold Ranking-Based Image
Retrieval with Output Bound",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "61:1--61:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565574",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3565574",
abstract = "Image retrieval keeps attracting a lot of attention
from both academic and industry over past years due to
its variety of useful applications. Due to the rapid
growth of deep learning approaches, more better feature
vectors of images could be discovered \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "61",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2023:SPA,
author = "Peng Zhou and Xinwang Liu and Liang Du and Xuejun Li",
title = "Self-paced Adaptive Bipartite Graph Learning for
Consensus Clustering",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "62:1--62:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564701",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3564701",
abstract = "Consensus clustering provides an elegant framework to
aggregate multiple weak clustering results to learn a
consensus one that is more robust and stable than a
single result. However, most of the existing methods
usually use all data for consensus \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "62",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:HTN,
author = "Mengran Li and Yong Zhang and Xiaoyong Li and Yuchen
Zhang and Baocai Yin",
title = "Hypergraph Transformer Neural Networks",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "63:1--63:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565028",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3565028",
abstract = "Graph neural networks (GNNs) have been widely used for
graph structure learning and achieved excellent
performance in tasks such as node classification and
link prediction. Real-world graph networks imply
complex and various semantic information and are
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "63",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:TFF,
author = "Haoran Li and Zhiqiang Lv and Jianbo Li and Zhihao Xu
and Yue Wang and Haokai Sun and Zhaoyu Sheng",
title = "Traffic Flow Forecasting in the {COVID-19}: a Deep
Spatial-temporal Model Based on Discrete Wavelet
Transformation",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "64:1--64:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564753",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3564753",
abstract = "Traffic flow prediction has always been the focus of
research in the field of Intelligent Transportation
Systems, which is conducive to the more reasonable
allocation of basic transportation resources and
formulation of transportation policies. The spread
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "64",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2023:CTIb,
author = "Gongqing Wu and Xingrui Zhuo and Xianyu Bao and
Xuegang Hu and Richang Hong and Xindong Wu",
title = "Crowdsourcing Truth Inference via Reliability-Driven
Multi-View Graph Embedding",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "65:1--65:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565576",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3565576",
abstract = "Crowdsourcing truth inference aims to assign a correct
answer to each task from candidate answers that are
provided by crowdsourced workers. A common approach is
to generate workers' reliabilities to represent the
quality of answers. Although crowdsourced \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "65",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jang:2023:SST,
author = "Jun-Gi Jang and U. Kang",
title = "Static and Streaming {Tucker} Decomposition for Dense
Tensors",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "66:1--66:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3568682",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3568682",
abstract = "Given a dense tensor, how can we efficiently discover
hidden relations and patterns in static and online
streaming settings? Tucker decomposition is a
fundamental tool to analyze multidimensional arrays in
the form of tensors. However, existing Tucker
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "66",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:ULH,
author = "Meng Wang and Boyu Li and Kun He and John Hopcroft",
title = "Uncovering the Local Hidden Community Structure in
Social Networks",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "67:1--67:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3567597",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3567597",
abstract = "Hidden community is a useful concept proposed recently
for social network analysis. Hidden communities
indicate some weak communities whose most members also
belong to other stronger dominant communities. Dominant
communities could form a layer that \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "67",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2023:CFU,
author = "Hao Liu and Qingyu Guo and Hengshu Zhu and Yanjie Fu
and Fuzhen Zhuang and Xiaojuan Ma and Hui Xiong",
title = "Characterizing and Forecasting Urban Vibrancy
Evolution: a Multi-View Graph Mining Perspective",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "68:1--68:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3568683",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3568683",
abstract = "Urban vibrancy describes the prosperity, diversity,
and accessibility of urban areas, which is vital to a
city's socio-economic development and sustainability.
While many efforts have been made for statically
measuring and evaluating urban vibrancy, there
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "68",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ren:2023:AMA,
author = "Yuyang Ren and Haonan Zhang and Peng Yu and Luoyi Fu
and Xinde Cao and Xinbing Wang and Guihai Chen and Fei
Long and Chenghu Zhou",
title = "{Ada-MIP}: Adaptive Self-supervised Graph
Representation Learning via Mutual Information and
Proximity Optimization",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "69:1--69:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3568165",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3568165",
abstract = "Self-supervised graph-level representation learning
has recently received considerable attention. Given
varied input distributions, jointly learning graphs'
unique and common features is vital to downstream
tasks. Inspired by graph contrastive learning
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "69",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gu:2023:OJU,
author = "Zhibin Gu and Songhe Feng and Ruiting Hu and Gengyu
Lyu",
title = "{ONION}: Joint Unsupervised Feature Selection and
Robust Subspace Extraction for Graph-based Multi-View
Clustering",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "70:1--70:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3568684",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3568684",
abstract = "Graph-based Multi-View Clustering (GMVC) has received
extensive attention due to its ability to capture the
neighborhood relationship among data points from
diverse views. However, most existing approaches
construct similarity graphs from the original
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "70",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:SGU,
author = "Zhijie Zhang and Wenzhong Li and Wangxiang Ding and
Linming Zhang and Qingning Lu and Peng Hu and Tong Gui
and Sanglu Lu",
title = "{STAD-GAN}: Unsupervised Anomaly Detection on
Multivariate Time Series with Self-training Generative
Adversarial Networks",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "71:1--71:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3572780",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3572780",
abstract = "Anomaly detection on multivariate time series (MTS) is
an important research topic in data mining, which has a
wide range of applications in information technology,
financial management, manufacturing system, and so on.
However, the state-of-the-art \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "71",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2023:WEC,
author = "Hongxin Wu and Meng Han and Zhiqiang Chen and Muhang
Li and Xilong Zhang",
title = "A Weighted Ensemble Classification Algorithm Based on
Nearest Neighbors for Multi-Label Data Stream",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "72:1--72:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3570960",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3570960",
abstract = "With the rapid development of data stream, multi-label
algorithms for mining dynamic data become more and more
important. At the same time, when data distribution
changes, concept drift will occur, which will make the
existing classification models lose \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "72",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:ASA,
author = "Chunnan Wang and Kaixin Zhang and Hongzhi Wang and
Bozhou Chen",
title = "{Auto-STGCN}: Autonomous Spatial-Temporal Graph
Convolutional Network Search",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "73:1--73:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3571285",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3571285",
abstract = "In recent years, many spatial-temporal graph
convolutional network (STGCN) models are proposed to
deal with the spatial-temporal network data forecasting
problem. These STGCN models have their own advantages,
i.e., each of them puts forward many effective
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "73",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:SSS,
author = "Yufu Chen and Yanghui Rao and Shurui Chen and Zhiqi
Lei and Haoran Xie and Raymond Y. K. Lau and Jian Yin",
title = "Semi-Supervised Sentiment Classification and Emotion
Distribution Learning Across Domains",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "74:1--74:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3571736",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3571736",
abstract = "In this study, sentiment classification and emotion
distribution learning across domains are both
formulated as a semi-supervised domain adaptation
problem, which utilizes a small amount of labeled
documents in the target domain for model training. By
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "74",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tang:2023:DSB,
author = "Hui Tang and Xun Liang and Yuhui Guo and Xiangping
Zheng and Bo Wu and Sensen Zhang and Zhiying Li",
title = "Diffuse and Smooth: Beyond Truncated Receptive Field
for Scalable and Adaptive Graph Representation
Learning",
journal = j-TKDD,
volume = "17",
number = "5",
pages = "75:1--75:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3572781",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Apr 8 07:47:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3572781",
abstract = "As the scope of receptive field and the depth of Graph
Neural Networks (GNNs) are two completely orthogonal
aspects for graph learning, existing GNNs often have
shallow layers with truncated-receptive field and far
from achieving satisfactory performance. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "75",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2023:PPP,
author = "Xiao Liu and Bonan Gao and Basem Suleiman and Han You
and Zisu Ma and Yu Liu and Ali Anaissi",
title = "Privacy-Preserving Personalized Fitness Recommender
System {P$^3$FitRec}: a Multi-level Deep Learning
Approach",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "76:1--76:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3572899",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3572899",
abstract = "Recommender systems have been successfully used in
many domains with the help of machine learning
algorithms. However, such applications tend to use
multi-dimensional user data, which has raised
widespread concerns about the breach of users' privacy.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "76",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2023:TCS,
author = "Jie Yang and Zhixiao Wang and Xiaobin Rui and Yahui
Chai and Philip S. Yu and Lichao Sun",
title = "Triadic Closure Sensitive Influence Maximization",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "77:1--77:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3573011",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3573011",
abstract = "The influence are not linked to any footnote in the
text. Please check and suggest. maximization problem
aims at selecting the k most influential nodes (i.e.,
seed nodes) from a social network, where the nodes can
maximize the number of influenced nodes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "77",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cousins:2023:BBC,
author = "Cyrus Cousins and Chloe Wohlgemuth and Matteo
Riondato",
title = "{Bavarian}: Betweenness Centrality Approximation with
Variance-aware {Rademacher} Averages",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "78:1--78:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3577021",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3577021",
abstract = "``[A]llain Gersten, Hopfen, und Wasser'' --- 1516
Reinheitsgebot We present Bavarian, a collection of
sampling-based algorithms for approximating the
Betweenness Centrality (BC) of all vertices in a graph.
Our algorithms use Monte-Carlo Empirical Rademacher
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "78",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jalali:2023:FIF,
author = "Zeinab S. Jalali and Qilan Chen and Shwetha M.
Srikanta and Weixiang Wang and Myunghwan Kim and Hema
Raghavan and Sucheta Soundarajan",
title = "Fairness of Information Flow in Social Networks",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "79:1--79:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578268",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3578268",
abstract = "Social networks form a major parts of people's lives,
and individuals often make important life decisions
based on information that spreads through these
networks. For this reason, it is important to know
whether individuals from different protected \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "79",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:EEG,
author = "Yunyi Li and Yongjing Hao and Pengpeng Zhao and
Guanfeng Liu and Yanchi Liu and Victor S. Sheng and
Xiaofang Zhou",
title = "Edge-enhanced Global Disentangled Graph Neural Network
for Sequential Recommendation",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "80:1--80:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3577928",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3577928",
abstract = "Sequential recommendation has been a widely popular
topic of recommender systems. Existing works have
contributed to enhancing the prediction ability of
sequential recommendation systems based on various
methods, such as recurrent networks and self-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "80",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Feng:2023:HDP,
author = "Wenjie Feng and Shenghua Liu and Xueqi Cheng",
title = "Hierarchical Dense Pattern Detection in Tensors",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "81:1--81:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3577022",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3577022",
abstract = "Dense subtensor detection gains remarkable success in
spotting anomalies and fraudulent behaviors for
multi-aspect data (i.e., tensors), like in social media
and event streams. Existing methods detect the densest
subtensors flatly and separately, with the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "81",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2023:GCD,
author = "Jingmin Huang and Bowei Chen and Zhi Yan and Iadh
Ounis and Jun Wang",
title = "{GEO}: a Computational Design Framework for Automotive
Exterior Facelift",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "82:1--82:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578521",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3578521",
abstract = "Exterior facelift has become an effective method for
automakers to boost the consumers' interest in an
existing car model before it is redesigned. To support
the automotive facelift design process, this study
develops a novel computational framework --- \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "82",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Singh:2023:MSM,
author = "Karandeep Singh and Seungeon Lee and Giuseppe (Joe)
Labianca and Jesse Michael Fagan and Meeyoung Cha",
title = "Multi-Stage Machine Learning Model for Hierarchical
Tie Valence Prediction",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "83:1--83:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3579096",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3579096",
abstract = "Individuals interacting in organizational settings
involving varying levels of formal hierarchy naturally
form a complex network of social ties having different
tie valences (e.g., positive and negative connections).
Social ties critically affect \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "83",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Su:2023:NMF,
author = "Sixing Su and Jiewen Guan and Bilian Chen and Xin
Huang",
title = "Nonnegative Matrix Factorization Based on Node
Centrality for Community Detection",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "84:1--84:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578520",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3578520",
abstract = "Community detection is an important topic in network
analysis, and recently many community detection methods
have been developed on top of the Nonnegative Matrix
Factorization (NMF) technique. Most NMF-based community
detection methods only utilize the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "84",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:EEA,
author = "Yuxiang Wang and Jun Liu and Xiaoliang Xu and Xiangyu
Ke and Tianxing Wu and Xiaoxuan Gou",
title = "Efficient and Effective Academic Expert Finding on
Heterogeneous Graphs through {$ (k, P) $}-Core based
Embedding",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "85:1--85:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578365",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3578365",
abstract = "Expert finding is crucial for a wealth of applications
in both academia and industry. Given a user query and
trove of academic papers, expert finding aims at
retrieving the most relevant experts for the query,
from the academic papers. Existing studies \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "85",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:SUI,
author = "Yongjie Wang and Ke Wang and Cheng Long and Chunyan
Miao",
title = "Summarizing User-item Matrix By Group Utility
Maximization",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "86:1--86:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578586",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3578586",
abstract = "A user-item utility matrix represents the utility (or
preference) associated with each (user, item) pair,
such as citation counts, rating/vote on items or
locations, and clicks on items. A high utility value
indicates a strong association of the pair. In
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "86",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ni:2023:MBP,
author = "Peikun Ni and Jianming Zhu and Guoqing Wang",
title = "Misinformation Blocking Problem in Virtual and Real
Interconversion Social Networks",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "87:1--87:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578936",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3578936",
abstract = "With the in-depth development of intelligent media
technology, online and offline fusion, reality and
virtual entanglement, information content
generalization, the boundary between positive and
negative information is blurred, all kinds of
misinformation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "87",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ren:2023:SFB,
author = "Jinjun Ren and Yuping Wang and Xiyan Deng",
title = "Slack-Factor-Based Fuzzy Support Vector Machine for
Class Imbalance Problems",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "88:1--88:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3579050",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3579050",
abstract = "Class imbalance and noisy data widely exist in
real-world problems, and the support vector machine
(SVM) is hard to construct good classifiers on these
data. Fuzzy SVMs (FSVMs), as variants of SVM, use a
fuzzy membership function both to reflect the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "88",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:TWP,
author = "Lei Li and Zhiyuan Liu and Zan Zhang and Huanhuan Chen
and Xindong Wu",
title = "Three-way Preference Completion via Preference Graph",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "89:1--89:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3580368",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3580368",
abstract = "With the personal partial rankings from agents over a
subset of alternatives, the goal of preference
completion is to infer the agent's personalized
preference over all alternatives including those the
agent has not yet handled from uncertain preference
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "89",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Khokhar:2023:DPR,
author = "Rashid Hussain Khokhar and Benjamin C. M. Fung and
Farkhund Iqbal and Khalil Al-Hussaeni and Mohammed
Hussain",
title = "Differentially Private Release of Heterogeneous
Network for Managing Healthcare Data",
journal = j-TKDD,
volume = "17",
number = "6",
pages = "90:1--90:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3580367",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Apr 17 11:51:51 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3580367",
abstract = "With the increasing adoption of digital health
platforms through mobile apps and online services,
people have greater flexibility connecting with medical
practitioners, pharmacists, and laboratories and
accessing resources to manage their own health-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "90",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:RCM,
author = "Mimi Zhang and Andrew Parnell",
title = "Review of Clustering Methods for Functional Data",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "91:1--91:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3581789",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3581789",
abstract = "Functional data clustering is to identify
heterogeneous morphological patterns in the continuous
functions underlying the discrete
measurements/observations. Application of functional
data clustering has appeared in many publications
across various fields \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "91",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:MLV,
author = "Ling Chen and Dandan Lyu and Shanshan Yu and Gencai
Chen",
title = "Multi-Level Visual Similarity Based Personalized
Tourist Attraction Recommendation Using Geo-Tagged
Photos",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "92:1--92:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3582015",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3582015",
abstract = "Geo-tagged photo-based tourist attraction
recommendation can discover users' travel preferences
from their taken photos, so as to recommend suitable
tourist attractions to them. However, existing visual
content-based methods cannot fully exploit the user
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "92",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2023:OSF,
author = "Wanyue Xu and Zhongzhi Zhang",
title = "Optimal Scale-Free Small-World Graphs with Minimum
Scaling of Cover Time",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "93:1--93:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3583691",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3583691",
abstract = "The cover time of random walks on a graph has found
wide practical applications in different fields of
computer science, such as crawling and searching on the
World Wide Web and query processing in sensor networks,
with the application effects dependent \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "93",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sheth:2023:CDI,
author = "Paras Sheth and Ruocheng Guo and Lu Cheng and Huan Liu
and Kasim Sel{\c{c}}uk Candan",
title = "Causal Disentanglement for Implicit Recommendations
with Network Information",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "94:1--94:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3582435",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3582435",
abstract = "Online user engagement is highly influenced by various
machine learning models, such as recommender systems.
These systems recommend new items to the user based on
the user's historical interactions. Implicit
recommender systems reflect a binary setting \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "94",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:ESM,
author = "Yihong Zhang and Xiu Susie Fang and Takahiro Hara",
title = "Evolving Social Media Background Representation with
Frequency Weights and Co-Occurrence Graphs",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "95:1--95:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3585389",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3585389",
abstract = "Social media as a background information source has
been utilized in many practical computational tasks,
such as stock price prediction, epidemic tracking, and
product recommendation. However, proper representation
of an evolving social media background \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "95",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:NWV,
author = "Huiru Li and Liangxiao Jiang and Siqing Xue",
title = "Neighborhood Weighted Voting-Based Noise Correction
for Crowdsourcing",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "96:1--96:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3586998",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3586998",
abstract = "In crowdsourcing scenarios, we can obtain each
instance's multiple noisy labels set from different
crowd workers and then use a ground truth inference
algorithm to infer its integrated label. Despite the
effectiveness of ground truth inference algorithms,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "96",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:DNE,
author = "He Li and Duo Jin and Xuejiao Li and Jianbin Huang and
Xiaoke Ma and Jiangtao Cui and Deshuang Huang and
Shaojie Qiao and Jaesoo Yoo",
title = "{DMGF-Net}: an Efficient Dynamic Multi-Graph Fusion
Network for Traffic Prediction",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "97:1--97:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3586164",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3586164",
abstract = "Traffic prediction is the core task of intelligent
transportation system (ITS) and accurate traffic
prediction can greatly improve the utilization of
public resources. Dynamic interaction of multiple
spatial relationships will influence the accuracy of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "97",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2023:CDC,
author = "Boxiang Zhao and Shuliang Wang and Lianhua Chi and Qi
Li and Xiaojia Liu and Jing Geng",
title = "Causal Discovery via Causal Star Graphs",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "98:1--98:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3586997",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3586997",
abstract = "Discovering causal relationships among observed
variables is an important research focus in data
mining. Existing causal discovery approaches are mainly
based on constraint-based methods and functional causal
models (FCMs). However, the constraint-based \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "98",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:GDL,
author = "Dexian Wang and Tianrui Li and Ping Deng and Fan Zhang
and Wei Huang and Pengfei Zhang and Jia Liu",
title = "A Generalized Deep Learning Clustering Algorithm Based
on Non-Negative Matrix Factorization",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "99:1--99:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3584862",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3584862",
abstract = "Clustering is a popular research topic in the field of
data mining, in which the clustering method based on
non-negative matrix factorization (NMF) has been widely
employed. However, in the update process of NMF, there
is no learning rate to guide the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "99",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Biswas:2023:RIM,
author = "Tarun Kumer Biswas and Alireza Abbasi and Ripon Kumar
Chakrabortty",
title = "Robust Influence Maximization Under Both Aleatory and
Epistemic Uncertainty",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "100:1--100:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587100",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3587100",
abstract = "Uncertainty is ubiquitous in almost every real-life
optimization problem, which must be effectively managed
to get a robust outcome. This is also true for the
Influence Maximization (IM) problem, which entails
locating a set of influential users within a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "100",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2023:DAD,
author = "Yuhui Guo and Xun Liang and Bo Wu and Xiangping Zheng
and Xuan Zhang",
title = "Dual-aware Domain Mining and Cross-aware Supervision
for Weakly-supervised Semantic Segmentation",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "101:1--101:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3589343",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3589343",
abstract = "Weakly Supervised Semantic Segmentation with
image-level annotation uses localization maps from the
classifier to generate pseudo labels. However, such
localization maps focus only on sparse salient object
regions, it is difficult to generate high-quality
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "101",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cheng:2023:FIU,
author = "Jiezhu Cheng and Kaizhu Huang and Zibin Zheng",
title = "Fitting Imbalanced Uncertainties in Multi-output Time
Series Forecasting",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "102:1--102:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3584704",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3584704",
abstract = "We focus on multi-step ahead time series forecasting
with the multi-output strategy. From the perspective of
multi-task learning (MTL), we recognize imbalanced
uncertainties between prediction tasks of different
future time steps. Unexpectedly, trained by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "102",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:MVE,
author = "Xuanqi Zhang and Qiangqiang Shen and Yongyong Chen and
Guokai Zhang and Zhongyun Hua and Jingyong Su",
title = "Multi-view Ensemble Clustering via Low-rank and Sparse
Decomposition: From Matrix to Tensor",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "103:1--103:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3589768",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3589768",
abstract = "As a significant extension of classical clustering
methods, ensemble clustering first generates multiple
basic clusterings and then fuses them into one
consensus partition by solving a problem concerning
graph partition with respect to the co-association
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "103",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:DDQ,
author = "Sensen Zhang and Xun Liang and Hui Tang and Xiangping
Zheng and Alex X. Zhang and Yuefeng Ma",
title = "{DuCape}: Dual Quaternion and Capsule Network-Based
Temporal Knowledge Graph Embedding",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "104:1--104:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3589644",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3589644",
abstract = "Recently, with the development of temporal knowledge
graph technology, more and more Temporal Knowledge
Graph Embedded (TKGE) models have been developed. The
effectiveness of TKGE largely depends on the ability to
model intrinsic relation patterns and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "104",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:TID,
author = "Hao Wang and Bin Guo and Jiaqi Liu and Yasan Ding and
Zhiwen Yu",
title = "Towards Informative and Diverse Dialogue Systems Over
Hierarchical Crowd Intelligence Knowledge Graph",
journal = j-TKDD,
volume = "17",
number = "7",
pages = "105:1--105:??",
month = aug,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3583758",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:55 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3583758",
abstract = "Knowledge-enhanced dialogue systems aim at generating
factually correct and coherent responses by reasoning
over knowledge sources, which is a promising research
trend. The truly harmonious human-agent dialogue
systems need to conduct engaging \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "105",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ragab:2023:ABS,
author = "Mohamed Ragab and Emadeldeen Eldele and Wee Ling Tan
and Chuan-Sheng Foo and Zhenghua Chen and Min Wu and
Chee-Keong Kwoh and Xiaoli Li",
title = "{ADATIME}: a Benchmarking Suite for Domain Adaptation
on Time Series Data",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "106:1--106:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587937",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3587937",
abstract = "Unsupervised domain adaptation methods aim at
generalizing well on unlabeled test data that may have
a different (shifted) distribution from the training
data. Such methods are typically developed on image
data, and their application to time series data
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "106",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Halstead:2023:CDM,
author = "Ben Halstead and Yun Sing Koh and Patricia Riddle and
Mykola Pechenizkiy and Albert Bifet",
title = "Combining Diverse Meta-Features to Accurately Identify
Recurring Concept Drift in Data Streams",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "107:1--107:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587098",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3587098",
abstract = "Learning from streaming data is challenging as the
distribution of incoming data may change over time, a
phenomenon known as concept drift. The predictive
patterns, or experience learned under one distribution
may become irrelevant as conditions change \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "107",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shi:2023:KFD,
author = "Linrui Shi and Zheng Zhang and Zizhu Fan and Chao Xi
and Zhengming Li and Gaochang Wu",
title = "{Kernel Fisher Dictionary Transfer Learning}",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "108:1--108:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3588575",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3588575",
abstract = "Dictionary learning is an efficient knowledge
representation method that can learn the essential
features of data. Traditional dictionary learning
methods are difficult to obtain nonlinear information
when processing large-scale and high-dimensional
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "108",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2023:GNN,
author = "Heli Sun and Miaomiao Sun and Xuechun Liu and Linlin
Zhu and Liang He and Xiaolin Jia and Yuan Chen",
title = "Graph Neural Networks with Motisf-aware for Tenuous
Subgraph Finding",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "109:1--109:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3589643",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3589643",
abstract = "Tenuous subgraph finding aims to detect a subgraph
with few social interactions and weak relationships
among nodes. Despite significant efforts made on this
task, they are mostly carried out in view of
graph-structured data. These methods depend on
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "109",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2023:LES,
author = "Likang Wu and Hongke Zhao and Zhi Li and Zhenya Huang
and Qi Liu and Enhong Chen",
title = "Learning the Explainable Semantic Relations via
Unified Graph Topic-Disentangled Neural Networks",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "110:1--110:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3589964",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3589964",
abstract = "Graph Neural Networks (GNNs) such as Graph
Convolutional Networks (GCNs) can effectively learn
node representations via aggregating neighbors based on
the relation graph. However, despite a few exceptions,
most of the previous work in this line does not
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "110",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jia:2023:SSB,
author = "Bohan Jia and Jian Cao and Shiyou Qian and Nengjun Zhu
and Xin Dong and Liang Zhang and Lei Cheng and Linjian
Mo",
title = "{SMONE}: a Session-based Recommendation Model Based on
Neighbor Sessions with Similar Probabilistic
Intentions",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "111:1--111:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587099",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3587099",
abstract = "A session-based recommendation system (SRS) tries to
predict the next possible choice of anonymous users. In
recent years, graph neural network (GNN) models have
been successfully applied to SRSs and have achieved
great success. Using GNN models in SRSs, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "111",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hassan:2023:CGD,
author = "Zohair Raza Hassan and Sarwan Ali and Imdadullah Khan
and Mudassir Shabbir and Waseem Abbas",
title = "Computing Graph Descriptors on Edge Streams",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "112:1--112:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3591468",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3591468",
abstract = "Feature extraction is an essential task in graph
analytics. These feature vectors, called graph
descriptors, are used in downstream vector-space-based
graph analysis models. This idea has proved fruitful in
the past, with spectral-based graph descriptors
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "112",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Canfora:2023:NCT,
author = "Gerardo Canfora and Francesco Mercaldo and Antonella
Santone",
title = "A Novel Classification Technique based on Formal
Methods",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "113:1--113:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592796",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3592796",
abstract = "In last years, we are witnessing a growing interest in
the application of supervised machine learning
techniques in the most disparate fields. One winning
factor of machine learning is represented by its
ability to easily create models, as it does not
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "113",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lin:2023:MVG,
author = "Bei Lin and You Li and Ning Gui and Zhuopeng Xu and
Zhiwu Yu",
title = "Multi-view Graph Representation Learning Beyond
Homophily",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "114:1--114:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592858",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3592858",
abstract = "Unsupervised graph representation learning (GRL) aims
at distilling diverse graph information into
task-agnostic embeddings without label supervision. Due
to a lack of support from labels, recent representation
learning methods usually adopt self-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "114",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tajeuna:2023:MRS,
author = "Etienne Gael Tajeuna and Mohamed Bouguessa and
Shengrui Wang",
title = "Modeling Regime Shifts in Multiple Time Series",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "115:1--115:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592857",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3592857",
abstract = "We investigate the problem of discovering and modeling
regime shifts in an ecosystem comprising multiple time
series known as co-evolving time series. Regime shifts
refer to the changing behaviors exhibited by series at
different time intervals. Learning \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "115",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shi:2023:ACS,
author = "Dan Shi and Lei Zhu and Xiao Dong and Xuemeng Song and
Jingjing Li and Zhiyong Cheng",
title = "Adaptive Collaborative Soft Label Learning for
Unsupervised Multi-View Feature Selection",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "116:1--116:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3591467",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3591467",
abstract = "Unsupervised multi-view feature selection aims to
select informative features with multi-view features
and unsupervised learning. It is a challenging problem
due to the absence of explicit semantic supervision.
Recently, graph theory and hard pseudo-label \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "116",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:CIT,
author = "Hao Zhang and Yewei Xia and Kun Zhang and Shuigeng
Zhou and Jihong Guan",
title = "Conditional Independence Test Based on Residual
Similarity",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "117:1--117:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3593810",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3593810",
abstract = "Recently, many regression-based conditional
independence (CI) test methods have been proposed to
solve the problem of causal discovery. These methods
provide alternatives to test CI of x,y given Z by first
removing the information of the controlling set Z
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "117",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yuan:2023:IVD,
author = "Junkun Yuan and Xu Ma and Ruoxuan Xiong and Mingming
Gong and Xiangyu Liu and Fei Wu and Lanfen Lin and Kun
Kuang",
title = "Instrumental Variable-Driven Domain Generalization
with Unobserved Confounders",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "118:1--118:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3595380",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3595380",
abstract = "Domain generalization (DG) aims to learn from multiple
source domains a model that can generalize well on
unseen target domains. Existing DG methods mainly learn
the representations with invariant marginal
distribution of the input features, however, the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "118",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qin:2023:CBI,
author = "Xi Qin and Cheng Zhong and Hai Xiang Lin",
title = "Community-Based Influence Maximization Using Network
Embedding in Dynamic Heterogeneous Social Networks",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "119:1--119:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3594544",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3594544",
abstract = "Influence maximization (IM) is a very important issue
in social network diffusion analysis. The topology of
real social network is large-scale, dynamic, and
heterogeneous. The heterogeneity, and continuous
expansion and evolution of social network pose a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "119",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhuang:2023:CLB,
author = "Jiabo Zhuang and Shunmei Meng and Jing Zhang and
Victor S. Sheng",
title = "Contrastive Learning Based Graph Convolution Network
for Social Recommendation",
journal = j-TKDD,
volume = "17",
number = "8",
pages = "120:1--120:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587268",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 3 07:15:57 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3587268",
abstract = "Exploiting social networks is expected to enhance the
performance of recommender systems when interaction
information is sparse. Existing social recommendation
models focus on modeling multi-graph structures and
then aggregating the information from these \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "120",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:RNR,
author = "Liang Zhang and Cheng Long",
title = "Road Network Representation Learning: a Dual
Graph-based Approach",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "121:1--121:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592859",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3592859",
abstract = "Road network is a critical infrastructure powering
many applications including transportation, mobility
and logistics in real life. To leverage the input of a
road network across these different applications, it is
necessary to learn the representations \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "121",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Syed:2023:SST,
author = "Tahir Syed and Behroz Mirza",
title = "Self-supervision for Tabular Data by Learning to
Predict Additive Homoskedastic {Gaussian} Noise as
Pretext",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "122:1--122:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3594720",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3594720",
abstract = "The lack of scalability of data annotation translates
to the need to decrease dependency on labels.
Self-supervision offers a solution with data training
themselves. However, it has received relatively less
attention on tabular data, data that drive a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "122",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:HCT,
author = "Xiaona Li and Zhu Wang and Xindong Chen and Bin Guo
and Zhiwen Yu",
title = "A Hybrid Continuous-Time Dynamic Graph Representation
Learning Model by Exploring Both Temporal and
Repetitive Information",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "123:1--123:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3596447",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3596447",
abstract = "Recently, dynamic graph representation learning has
attracted more and more attention from both academic
and industrial communities due to its capabilities of
capturing different real-world phenomena. For a dynamic
graph represented as a sequence of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "123",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ou:2023:STS,
author = "Junjie Ou and Haiming Jin and Xiaocheng Wang and Hao
Jiang and Xinbing Wang and Chenghu Zhou",
title = "{STA-TCN}: Spatial-temporal Attention over Temporal
Convolutional Network for Next Point-of-interest
Recommendation",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "124:1--124:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3596497",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3596497",
abstract = "Recent years have witnessed a vastly increasing
popularity of location-based social networks (LBSNs),
which facilitates studies on the next Point-of-Interest
(POI) recommendation problem. A user's POI visiting
behavior shows the sequential transition. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "124",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2023:MLS,
author = "Shaowei Jiang and Wei He and Lizhen Cui and Yonghui Xu
and Lei Liu",
title = "Modeling Long- and Short-Term User Preferences via
Self-Supervised Learning for Next {POI}
Recommendation",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "125:1--125:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597211",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3597211",
abstract = "With the accumulation of check-in data from
location-based services, next Point-of-Interest (POI)
recommendations are gaining increasing attention. It is
well known that the spatio-temporal contextual
information of user check-in behavior plays a crucial
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "125",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jang:2023:AOS,
author = "Jun-Gi Jang and Sooyeon Shim and Vladimir Egay and
Jeeyong Lee and Jongmin Park and Suhyun Chae and U.
Kang",
title = "Accurate Open-Set Recognition for Memory Workload",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "126:1--126:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597027",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3597027",
abstract = "How can we accurately identify new memory workloads
while classifying known memory workloads? Verifying
DRAM (Dynamic Random Access Memory) using various
workloads is an important task to guarantee the quality
of DRAM. A crucial component in the process \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "126",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qin:2023:TBT,
author = "Meng Qin and Chaorui Zhang and Bo Bai and Gong Zhang
and Dit-Yan Yeung",
title = "Towards a Better Tradeoff between Quality and
Efficiency of Community Detection: an Inductive
Embedding Method across Graphs",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "127:1--127:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3596605",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3596605",
abstract = "Many network applications can be formulated as NP-hard
combinatorial optimization problems of community
detection (CD) that partitions nodes of a graph into
several groups with dense linkage. Most existing CD
methods are transductive, which independently
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "127",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2023:LRR,
author = "Haoran Chen and Xu Chen and Hongwei Tao and Zuhe Li
and Xiao Wang",
title = "Low-rank Representation with Adaptive Dimensionality
Reduction via Manifold Optimization for Clustering",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "128:1--128:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3589767",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3589767",
abstract = "The dimensionality reduction techniques are often used
to reduce data dimensionality for computational
efficiency or other purposes in existing low-rank
representation (LRR)-based methods. However, the two
steps of dimensionality reduction and learning
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "128",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2023:TTP,
author = "Ye Liu and Han Wu and Zhenya Huang and Hao Wang and
Yuting Ning and Jianhui Ma and Qi Liu and Enhong Chen",
title = "{TechPat}: Technical Phrase Extraction for Patent
Mining",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "129:1--129:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3596603",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3596603",
abstract = "In recent years, due to the explosive growth of patent
applications, patent mining has drawn extensive
attention and interest. An important issue of patent
mining is that of recognizing the technologies
contained in patents, which serves as a fundamental
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "129",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:MRA,
author = "Chunyang Wang and Yanmin Zhu and Haobing Liu and
Tianzi Zang and Ke Wang and Jiadi Yu",
title = "Multifaceted Relation-aware Meta-learning with Dual
Customization for User Cold-start Recommendation",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "130:1--130:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597458",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3597458",
abstract = "User cold-start scenarios pose great challenges to
recommendation systems in accurately capturing user
preferences with sparse interaction records. Besides
incorporating auxiliary information to enrich user/item
representations, recent studies under the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "130",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2023:ITB,
author = "Zhiwen Yu and Minling Dang and Qilong Wu and Liming
Chen and Yujin Xie and Yu Wang and Bin Guo",
title = "An Information Theory Based Method for Quantifying the
Predictability of Human Mobility",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "131:1--131:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597500",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3597500",
abstract = "Research on human mobility drives the development of
economy and society. How to predict when and where one
will go accurately is one of the core research
questions. Existing work is mainly concerned with
performance of mobility prediction models. Since
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "131",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ramezani:2023:JID,
author = "Maryam Ramezani and Aryan Ahadinia and Amirmohammad
Ziaei Bideh and Hamid R. Rabiee",
title = "Joint Inference of Diffusion and Structure in
Partially Observed Social Networks Using Coupled Matrix
Factorization",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "132:1--132:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3599237",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3599237",
abstract = "Access to complete data in large-scale networks is
often infeasible. Therefore, the problem of missing
data is a crucial and unavoidable issue in the analysis
and modeling of real-world social networks. However,
most of the research on different aspects \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "132",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2023:SIM,
author = "Yandi Li and Haobo Gao and Yunxuan Gao and Jianxiong
Guo and Weili Wu",
title = "A Survey on Influence Maximization: From an {ML}-Based
Combinatorial Optimization",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "133:1--133:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604559",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3604559",
abstract = "Influence Maximization (IM) is a classical
combinatorial optimization problem, which can be widely
used in mobile networks, social computing, and
recommendation systems. It aims at selecting a small
number of users such that maximizing the influence
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "133",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2023:MLF,
author = "Zan Zhang and Zhe Zhang and Jialu Yao and Lin Liu and
Jiuyong Li and Gongqing Wu and Xindong Wu",
title = "Multi-Label Feature Selection Via Adaptive Label
Correlation Estimation",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "134:1--134:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604560",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3604560",
abstract = "In multi-label learning, each instance is associated
with multiple labels simultaneously. Multi-label data
often have noisy, irrelevant, and redundant features of
high dimensionality. Multi-label feature selection has
received considerable attention as an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "134",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ren:2023:CCG,
author = "Siyuan Ren and Bin Guo and Ke Li and Qianru Wang and
Qinfen Wang and Zhiwen Yu",
title = "{CoupledGT}: Coupled Geospatial-temporal Data Modeling
for Air Quality Prediction",
journal = j-TKDD,
volume = "17",
number = "9",
pages = "135:1--135:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604616",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Aug 19 07:15:21 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3604616",
abstract = "Air pollution seriously affects public health, while
effective air quality prediction remains a challenging
problem since the complex spatial-temporal couplings
exist in multi-area monitoring data of the city.
Current approaches rarely consider relative \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "135",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:HHG,
author = "Youru Li and Zhenfeng Zhu and Xiaobo Guo and Shaoshuai
Li and Yuchen Yang and Yao Zhao",
title = "{HGV4Risk}: Hierarchical Global View-guided Sequence
Representation Learning for Risk Prediction",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605895",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3605895",
abstract = "Risk prediction, usually achieved by learning
representations from patient's physiological sequence
or user's behavioral sequence data, and has been widely
applied in healthcare and finance. Despite that, some
recent time-aware deep learning methods have \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "1",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Amagata:2024:EDP,
author = "Daichi Amagata and Takahiro Hara",
title = "Efficient Density-peaks Clustering Algorithms on
Static and Dynamic Data in {Euclidean} Space",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607873",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3607873",
abstract = "Clustering multi-dimensional points is a fundamental
task in many fields, and density-based clustering
supports many applications because it can discover
clusters of arbitrary shapes. This article addresses
the problem of Density-Peaks Clustering (DPC) in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "2",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Deng:2024:TNF,
author = "Jiewen Deng and Jinliang Deng and Du Yin and Renhe
Jiang and Xuan Song",
title = "{TTS-Norm}: Forecasting Tensor Time Series via
Multi-Way Normalization",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605894",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3605894",
abstract = "Tensor time series (TTS) data, a generalization of
one-dimensional time series on a high-dimensional
space, is ubiquitous in real-world applications.
Compared to modeling time series or multivariate time
series, which has received much attention and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "3",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Moreo:2024:MLQ,
author = "Alejandro Moreo and Manuel Francisco and Fabrizio
Sebastiani",
title = "Multi-Label Quantification",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3606264",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3606264",
abstract = "Quantification, variously called supervised prevalence
estimation or learning to quantify, is the supervised
learning task of generating predictors of the relative
frequencies (a.k.a. prevalence values ) of the classes
of interest in unlabelled data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "4",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:HSF,
author = "Chunkai Zhang and Yuting Yang and Zilin Du and
Wensheng Gan and Philip S. Yu",
title = "{HUSP-SP}: Faster Utility Mining on Sequence Data",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597935",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3597935",
abstract = "High-utility sequential pattern mining (HUSPM) has
emerged as an important topic due to its wide
application and considerable popularity. However, due
to the combinatorial explosion of the search space when
the HUSPM problem encounters a low-utility \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "5",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:MVG,
author = "Zhaoliang Chen and Lele Fu and Shunxin Xiao and
Shiping Wang and Claudia Plant and Wenzhong Guo",
title = "Multi-View Graph Convolutional Networks with
Differentiable Node Selection",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608954",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3608954",
abstract = "Multi-view data containing complementary and consensus
information can facilitate representation learning by
exploiting the intact integration of multi-view
features. Because most objects in the real world often
have underlying connections, organizing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "6",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Luo:2024:DLC,
author = "Fangyuan Luo and Jun Wu and Tao Wang",
title = "Discrete Listwise Content-aware Recommendation",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609334",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3609334",
abstract = "To perform online inference efficiently, hashing
techniques, devoted to encoding model parameters as
binary codes, play a key role in reducing the
computational cost of content-aware recommendation
(CAR), particularly on devices with limited computation
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "7",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:SGC,
author = "Huiyuan Li and Li Yu and Xi Niu and Youfang Leng and
Qihan Du",
title = "Sequential and Graphical Cross-Domain Recommendations
with a Multi-View Hierarchical Transfer Gate",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604615",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3604615",
abstract = "Cross-domain recommender systems could potentially
improve the recommendation performance by means of
transferring abundant knowledge from the auxiliary
domain to the target domain. They could help address
some key challenges in recommender systems, such
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "8",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Mai:2024:SCC,
author = "Weiming Mai and Jiangchao Yao and Gong Chen and Ya
Zhang and Yiu-Ming Cheung and Bo Han",
title = "Server-Client Collaborative Distillation for Federated
Reinforcement Learning",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604939",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3604939",
abstract = "Federated Learning (FL) learns a global model in a
distributional manner, which does not require local
clients to share private data. Such merit has drawn
lots of attention in the interaction scenarios, where
Federated Reinforcement Learning (FRL) emerges
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "9",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2024:FRS,
author = "Yao Wu and Jian Cao and Guandong Xu",
title = "Fairness in Recommender Systems: Evaluation Approaches
and Assurance Strategies",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604558",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3604558",
abstract = "With the wide application of recommender systems, the
potential impacts of recommender systems on customers,
item providers and other parties have attracted
increasing attention. Fairness, which is the quality of
treating people equally, is also becoming \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "10",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:TTD,
author = "Huan Wang and Guoquan Liu and Po Hu",
title = "{TDAN}: Transferable Domain Adversarial Network for
Link Prediction in Heterogeneous Social Networks",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3610229",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3610229",
abstract = "Link prediction has received increased attention in
social network analysis. One of the unique challenges
in heterogeneous social networks is link prediction in
new link types without verified link information, such
as recommending products to new \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "11",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Corbara:2024:SDD,
author = "Silvia Corbara and Alejandro Moreo and Fabrizio
Sebastiani",
title = "Same or Different? {Diff}-Vectors for Authorship
Analysis",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609226",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3609226",
abstract = "In this article, we investigate the effects on
authorship identification tasks (including authorship
verification, closed-set authorship attribution, and
closed-set and open-set same-author verification) of a
fundamental shift in how to conceive the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "12",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2024:RRH,
author = "Jincheng Huang and Ping Li and Rui Huang and Na Chen
and Acong Zhang",
title = "Revisiting the Role of Heterophily in Graph
Representation Learning: an Edge Classification
Perspective",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603378",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3603378",
abstract = "Graph representation learning aims at integrating node
contents with graph structure to learn nodes/graph
representations. Nevertheless, it is found that many
existing graph learning methods do not work well on
data with high heterophily level that \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "13",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Luo:2024:CBH,
author = "Xiao Luo and Daqing Wu and Yiyang Gu and Chong Chen
and Luchen Liu and Jinwen Ma and Ming Zhang and Minghua
Deng and Jianqiang Huang and Xian-Sheng Hua",
title = "Criterion-based Heterogeneous Collaborative Filtering
for Multi-behavior Implicit Recommendation",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3611310",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3611310",
abstract = "Recent years have witnessed the explosive growth of
interaction behaviors in multimedia information
systems, where multi-behavior recommender systems have
received increasing attention by leveraging data from
various auxiliary behaviors such as tip and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "14",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:DDP,
author = "Huiting Liu and Yu Zhang and Peipei Li and Cheng Qian
and Peng Zhao and Xindong Wu",
title = "{DeepCPR}: Deep Path Reasoning Using Sequence of
User-Preferred Attributes for Conversational
Recommendation",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3610775",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3610775",
abstract = "Conversational recommender systems (CRS) have garnered
significant attention in academia and industry because
of their ability to capture user preferences via system
questions and user responses. Typically, in a CRS,
reinforcement learning (RL) is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "15",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:DAD,
author = "Qiuyue Zhang and Yunfeng Zhang and Xunxiang Yao and
Shilong Li and Caiming Zhang and Peide Liu",
title = "A Dynamic Attributes-driven Graph Attention Network
Modeling on Behavioral Finance for Stock Prediction",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3611311",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3611311",
abstract = "Stock prediction is a challenging task due to multiple
influencing factors and complex market dependencies.
Traditional solutions are based on a single type of
information. With the success of multi-source
information in different fields, the combination
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "16",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chowdhury:2024:INC,
author = "Anjan Chowdhury and Sriram Srinivasan and Animesh
Mukherjee and Sanjukta Bhowmick and Kuntal Ghosh",
title = "Improving Node Classification Accuracy of {GNN}
through Input and Output Intervention",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3610535",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3610535",
abstract = "Graph Neural Networks (GNNs) are a popular machine
learning framework for solving various graph processing
applications. This framework exploits both the graph
topology and the feature vectors of the nodes. One of
the important applications of GNN is in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "17",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:SSD,
author = "Ke-Jia Chen and Linsong Liu and Linpu Jiang and
Jingqiang Chen",
title = "Self-Supervised Dynamic Graph Representation Learning
via Temporal Subgraph Contrast",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3612931",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3612931",
abstract = "Self-supervised learning on graphs has recently drawn
a lot of attention due to its independence from labels
and its robustness in representation. Current studies
on this topic mainly use static information such as
graph structures but cannot well capture \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "18",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2024:LBC,
author = "Yan Sun and Yi Han and Jicong Fan",
title = "{Laplacian}-based Cluster-Contractive $t$-{SNE} for
High-Dimensional Data Visualization",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "19:1--19:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3612932",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3612932",
abstract = "Dimensionality reduction techniques aim at
representing high-dimensional data in low-dimensional
spaces to extract hidden and useful information or
facilitate visual understanding and interpretation of
the data. However, few of them take into \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "19",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ge:2024:DCC,
author = "Yong-Feng Ge and Elisa Bertino and Hua Wang and Jinli
Cao and Yanchun Zhang",
title = "Distributed Cooperative Coevolution of Data Publishing
Privacy and Transparency",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "20:1--20:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3613962",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3613962",
abstract = "Data transparency is beneficial to data participants'
awareness, users' fairness, and research work's
reproducibility. However, when addressing transparency
requirements, we cannot ignore data privacy. This
article defines the multi-objective data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "20",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Strukova:2024:AKI,
author = "Sofia Strukova and Jos{\'e} A. Ruip{\'e}rez-Valiente
and F{\'e}lix G{\'o}mez M{\'a}rmol",
title = "Adapting Knowledge Inference Algorithms to Measure
Geometry Competencies through a Puzzle Game",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "21:1--21:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3614436",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3614436",
abstract = "The rapid technological evolution of the last years
has motivated students to develop capabilities that
will prepare them for an unknown future in the 21st
century. In this context, many teachers intend to
optimise the learning process, making it more
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "21",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:ETL,
author = "Bo Liu and Liangjiao Li and Yanshan Xiao and Kai Wang
and Jian Hu and Junrui Liu and Qihang Chen and Ruiguang
Huang",
title = "An Efficient Transfer Learning Method with Auxiliary
Information",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "22:1--22:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3612930",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3612930",
abstract = "Transfer learning (TL) is an information reuse
learning tool, which can help us learn better
classification effect than traditional single task
learning, because transfer learning can share
information within the task-to-task model. Most TL
algorithms are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "22",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:SEA,
author = "Zhong Li and Yuxuan Zhu and Matthijs {Van Leeuwen}",
title = "A Survey on Explainable Anomaly Detection",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "23:1--23:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609333",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3609333",
abstract = "In the past two decades, most research on anomaly
detection has focused on improving the accuracy of the
detection, while largely ignoring the explainability of
the corresponding methods and thus leaving the
explanation of outcomes to practitioners. As \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "23",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jiang:2024:TLA,
author = "Meng Jiang",
title = "Transfer Learning across Graph Convolutional Networks:
Methods, Theory, and Applications",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "24:1--24:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3617376",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3617376",
abstract = "Graph neural networks have been widely used for
learning representations of nodes for many downstream
tasks on graph data. Existing models were designed for
the nodes on a single graph, which would not be able to
utilize information across multiple \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "24",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:CQM,
author = "Lizhen Wang and Vanha Tran and Thanhcong Do",
title = "A Clique-Querying Mining Framework for Discovering
High Utility Co-Location Patterns without Generating
Candidates",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "25:1--25:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3617378",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3617378",
abstract = "Groups of spatial features whose instances frequently
appear together in nearby areas are regarded as
prevalent co-location patterns (PCPs). Traditional PCP
mining ignores the significance of instances and
features. However, in reality, these instances
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "25",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Fu:2024:MUC,
author = "Zhe Fu and Xi Niu",
title = "Modeling Users' Curiosity in Recommender Systems",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "26:1--26:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3617598",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3617598",
abstract = "Today's recommender systems are criticized for
recommending items that are too obvious to arouse
users' interests. Therefore, the research community has
advocated some ``beyond accuracy'' evaluation metrics
such as novelty, diversity, and serendipity with
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "26",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:OGC,
author = "Hui-Jia Li and Yuhao Feng and Chengyi Xia and Jie
Cao",
title = "Overlapping Graph Clustering in Attributed Networks
via Generalized Cluster Potential Game",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "27:1--27:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597436",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3597436",
abstract = "Overlapping graph clustering is essential to
understand the nature and behavior of real complex
systems including human interactions, technical systems
and transportation network. However, in addition of
topological structure, many real-world networked
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "27",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:UKG,
author = "Yu Liu and Zhilun Zhou and Yong Li and Depeng Jin",
title = "Urban Knowledge Graph Aided Mobile User Profiling",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "28:1--28:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3596604",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3596604",
abstract = "Nowadays, the explosive growth of personalized web
applications and the rapid development of artificial
intelligence technology have flourished the recent
research on mobile user profiling, i.e., inferring the
user profile from mobile behavioral data. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "28",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kaibiao:2024:ANG,
author = "Lin Kaibiao and Jinpo Chen and Chen Ruicong and Yang
Fan and Zhang Yang and Lin Min and Lu Ping",
title = "Adaptive Neighbor Graph Aggregated Graph Attention
Network for Heterogeneous Graph Embedding",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "29:1--29:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3616377",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3616377",
abstract = "Graph attention network can generate effective feature
embedding by specifying different weights to different
nodes. The key of the research on heterogeneous graph
embedding is the way to combine its rich structural
information with semantic relations to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "29",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:MMG,
author = "Yashen Wang and Xiaoye Ouyang and Dayu Guo and
Xiaoling Zhu",
title = "{MEGA}: Meta-Graph Augmented Pre-Training Model for
Knowledge Graph Completion",
journal = j-TKDD,
volume = "18",
number = "1",
pages = "30:1--30:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3617379",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:43 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3617379",
abstract = "Nowadays, a large number of Knowledge Graph Completion
(KGC) methods have been proposed by using embedding
based manners, to overcome the incompleteness problem
faced with knowledge graph (KG). One important recent
innovation in Natural Language \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "30",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2023:SDR,
author = "Xiang Wang and Liping Jing and Huafeng Liu and Jian
Yu",
title = "Structure-Driven Representation Learning for Deep
Clustering",
journal = j-TKDD,
volume = "18",
number = "1",
publisher = "Association for Computing Machinery (ACM)",
pages = "1--25",
month = oct,
year = "2023",
DOI = "https://doi.org/10.1145/3623400",
ISSN = "1556-472X",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:07:57 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1054",
}
@Article{Alam:2023:DIP,
author = "Md. Tanvir Alam and Chowdhury Farhan Ahmed and Md.
Samiullah and Carson Kai-Sang Leung",
title = "Discovering Interesting Patterns from Hypergraphs",
journal = j-TKDD,
volume = "18",
number = "1",
publisher = "Association for Computing Machinery (ACM)",
pages = "1--34",
month = oct,
year = "2023",
DOI = "https://doi.org/10.1145/3622940",
ISSN = "1556-472X",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:07:57 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1054",
}
@Article{Li:2024:ECB,
author = "Fangfang Li and Zhi Liu and Junwen Duan and Xingliang
Mao and Heyuan Shi and Shichao Zhang",
title = "Exploiting Conversation-Branch-Tweet {HyperGraph}
Structure to Detect Misinformation on Social Media",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "33:1--33:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3610297",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3610297",
abstract = "The spread of misinformation on social media is a
serious issue that can have negative consequences for
public health and political stability. While detecting
and identifying misinformation can be challenging, many
attempts have been made to address this \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "33",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Luo:2024:SSG,
author = "Xiao Luo and Wei Ju and Yiyang Gu and Zhengyang Mao
and Luchen Liu and Yuhui Yuan and Ming Zhang",
title = "Self-supervised Graph-level Representation Learning
with Adversarial Contrastive Learning",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "34:1--34:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624018",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3624018",
abstract = "The recently developed unsupervised graph
representation learning approaches apply contrastive
learning into graph-structured data and achieve
promising performance. However, these methods mainly
focus on graph augmentation for positive samples, while
the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "34",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Miller:2024:ASP,
author = "Benjamin A. Miller and Zohair Shafi and Wheeler Ruml
and Yevgeniy Vorobeychik and Tina Eliassi-Rad and Scott
Alfeld",
title = "Attacking Shortest Paths by Cutting Edges",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "35:1--35:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3622941",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3622941",
abstract = "Identifying shortest paths between nodes in a network
is a common graph analysis problem that is important
for many applications involving routing of resources.
An adversary that can manipulate the graph structure
could alter traffic patterns to gain some \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "35",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Spinnato:2024:UTS,
author = "Francesco Spinnato and Riccardo Guidotti and Anna
Monreale and Mirco Nanni and Dino Pedreschi and Fosca
Giannotti",
title = "Understanding Any Time Series Classifier with a
Subsequence-based Explainer",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "36:1--36:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624480",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3624480",
abstract = "The growing availability of time series data has
increased the usage of classifiers for this data type.
Unfortunately, state-of-the-art time series classifiers
are black-box models and, therefore, not usable in
critical domains such as healthcare or \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "36",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2024:FSE,
author = "Kui Yu and Zhaolong Ling and Lin Liu and Peipei Li and
Hao Wang and Jiuyong Li",
title = "Feature Selection for Efficient Local-to-global
{Bayesian} Network Structure Learning",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "37:1--37:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624479",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3624479",
abstract = "Local-to-global learning approach plays an essential
role in Bayesian network (BN) structure learning.
Existing local-to-global learning algorithms first
construct the skeleton of a DAG (directed acyclic
graph) by learning the MB (Markov blanket) or PC
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "37",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cai:2024:RDS,
author = "Ruichu Cai and Fengzhu Wu and Zijian Li and Jie Qiao
and Wei Chen and Yuexing Hao and Hao Gu",
title = "{REST}: Debiased Social Recommendation via
Reconstructing Exposure Strategies",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "38:1--38:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624986",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3624986",
abstract = "The recommendation system, relying on historical
observational data to model the complex relationships
among users and items, has achieved great success in
real-world applications. Selection bias is one of the
most important issues of the existing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "38",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ye:2024:MDF,
author = "Xiaoqing Ye and Yang Sun and Dun Liu and Tianrui Li",
title = "A Multisource Data Fusion-based Heterogeneous Graph
Attention Network for Competitor Prediction",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "39:1--39:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3625101",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3625101",
abstract = "Competitor identification is an essential component of
corporate strategy. With the rapid development of
artificial intelligence, various data-mining
methodologies and frameworks have emerged to identify
competitors. In general, the competitiveness among
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "39",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:FPP,
author = "Taolin Zhang and Chengyuan Mai and Yaomin Chang and
Chuan Chen and Lin Shu and Zibin Zheng",
title = "{FedEgo}: Privacy-preserving Personalized Federated
Graph Learning with Ego-graphs",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "40:1--40:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624017",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3624017",
abstract = "As special information carriers containing both
structure and feature information, graphs are widely
used in graph mining, e.g., Graph Neural Networks
(GNNs). However, graph data are stored separately in
multiple distributed parties in some practical
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "40",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lu:2024:CTT,
author = "Xiangkui Lu and Jun Wu and Junheng Huang and Fangyuan
Luo and Jianbo Yuan",
title = "Co-Training-Teaching: a Robust Semi-Supervised
Framework for Review-Aware Rating Regression",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "41:1--41:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3625391",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3625391",
abstract = "Review-aware Rating Regression (RaRR) suffers the
severe challenge of extreme data sparsity as the
multi-modality interactions of ratings accompanied by
reviews are costly to obtain. Although some studies of
semi-supervised rating regression are proposed
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "41",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zareie:2024:MDE,
author = "Ahmad Zareie and Rizos Sakellariou",
title = "Maximizing the Diversity of Exposure in Online Social
Networks by Identifying Users with Increased
Susceptibility to Persuasion",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "42:1--42:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3625826",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3625826",
abstract = "Individuals may have a range of opinions on
controversial topics. However, the ease of making
friendships in online social networks tends to create
groups of like-minded individuals, who propagate
messages that reinforce existing opinions and ignore
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "42",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2024:OWG,
author = "Hui Xu and Liyao Xiang and Junjie Ou and Yuting Weng
and Xinbing Wang and Chenghu Zhou",
title = "Open-World Graph Active Learning for Node
Classification",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "43:1--43:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607144",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3607144",
abstract = "The great power of Graph Neural Networks (GNNs) relies
on a large number of labeled training data, but
obtaining the labels can be costly in many cases. Graph
Active Learning (GAL) is proposed to reduce such
annotation costs, but the existing methods \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "43",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:SSH,
author = "Ying Chen and Siwei Qiang and Mingming Ha and Xiaolei
Liu and Shaoshuai Li and Jiabi Tong and Lingfeng Yuan
and Xiaobo Guo and Zhenfeng Zhu",
title = "Semi-Supervised Heterogeneous Graph Learning with
Multi-Level Data Augmentation",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "44:1--44:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608953",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3608953",
abstract = "In recent years, semi-supervised graph learning with
data augmentation (DA) has been the most commonly used
and best-performing method to improve model robustness
in sparse scenarios with few labeled samples. However,
most existing DA methods are based on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "44",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:RET,
author = "Huan Wang and Ruigang Liu and Chuanqi Shi and Junyang
Chen and Lei Fang and Shun Liu and Zhiguo Gong",
title = "Resisting the Edge-Type Disturbance for Link
Prediction in Heterogeneous Networks",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "45:1--45:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3614099",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3614099",
abstract = "The rapid development of heterogeneous networks has
proposed new challenges to the long-standing link
prediction problem. Existing models trained on the
verified edge samples from different types usually
learn type-specific knowledge, and their type-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "45",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:ASG,
author = "Xiaoting Li and Lingwei Chen and Dinghao Wu",
title = "Adversary for Social Good: Leveraging Adversarial
Attacks to Protect Personal Attribute Privacy",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "46:1--46:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3614098",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3614098",
abstract = "Social media has drastically reshaped the world that
allows billions of people to engage in such interactive
environments to conveniently create and share content
with the public. Among them, text data (e.g., tweets,
blogs) maintains the basic yet \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "46",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yan:2024:GMC,
author = "Bo Yan and Cheng Yang and Chuan Shi and Yong Fang and
Qi Li and Yanfang Ye and Junping Du",
title = "Graph Mining for Cybersecurity: a Survey",
journal = j-TKDD,
volume = "18",
number = "2",
pages = "47:1--47:??",
month = feb,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3610228",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3610228",
abstract = "The explosive growth of cyber attacks today, such as
malware, spam, and intrusions, has caused severe
consequences on society. Securing cyberspace has become
a great concern for organizations and governments.
Traditional machine learning based methods are
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "47",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2024:MII,
author = "Qiang Huang and Jing Ma and Jundong Li and Ruocheng
Guo and Huiyan Sun and Yi Chang",
title = "Modeling Interference for Individual Treatment Effect
Estimation from Networked Observational Data",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "48:1--48:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3628449",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3628449",
abstract = "Estimating individual treatment effect (ITE) from
observational data has attracted great interest in
recent years, which plays a crucial role in
decision-making across many high-impact domains such as
economics, medicine, and e-commerce. Most existing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "48",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gonzalez-Zelaya:2024:FPD,
author = "Vladimiro Gonz{\'a}lez-Zelaya and Juli{\'a}n Salas and
David Meg{\'\i}as and Paolo Missier",
title = "Fair and Private Data Preprocessing through
Microaggregation",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "49:1--49:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3617377",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3617377",
abstract = "Privacy protection for personal data and fairness in
automated decisions are fundamental requirements for
responsible Machine Learning. Both may be enforced
through data preprocessing and share a common target:
data should remain useful for a task, while \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "49",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cheng:2024:AFS,
author = "Ling Cheng and Feida Zhu and Yong Wang and Ruicheng
Liang and Huiwen Liu",
title = "From Asset Flow to Status, Action, and Intention
Discovery: Early Malice Detection in Cryptocurrency",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "50:1--50:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626102",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3626102",
abstract = "Cryptocurrency has been subject to illicit activities
probably more often than traditional financial assets
due to the pseudo-anonymous nature of its transacting
entities. An ideal detection model is expected to
achieve all three critical properties of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "50",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2024:EEF,
author = "Yimin Huang and Wanwan Wang and Xingying Zhao and
Yukun Wang and Xinyu Feng and Hao He and Ming Yao",
title = "{EFMVFL}: an Efficient and Flexible Multi-party
Vertical Federated Learning without a Third Party",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "51:1--51:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3627993",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3627993",
abstract = "Federated learning (FL) is a machine learning setting
which allows multiple participants collaboratively to
train a model under the orchestration of a server
without disclosing their local data. Vertical federated
learning (VFL) is a special structure in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "51",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pellegrina:2024:SEB,
author = "Leonardo Pellegrina and Fabio Vandin",
title = "{SILVAN}: Estimating Betweenness Centralities with
Progressive Sampling and Non-uniform {Rademacher}
Bounds",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "52:1--52:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3628601",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3628601",
abstract = "``Sim Sala Bim!'' -Silvan, Betweenness centrality is a
popular centrality measure with applications in several
domains and whose exact computation is impractical for
modern-sized networks. We present SILVAN, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "52",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xia:2024:HEU,
author = "Tong Xia and Yong Li and Yunhan Qi and Jie Feng and
Fengli Xu and Funing Sun and Diansheng Guo and Depeng
Jin",
title = "History-enhanced and Uncertainty-aware Trajectory
Recovery via Attentive Neural Network",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "53:1--53:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3615660",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3615660",
abstract = "A considerable amount of mobility data has been
accumulated due to the proliferation of location-based
services. Nevertheless, compared with mobility data
from transportation systems like the GPS module in
taxis, this kind of data is commonly sparse in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "53",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ao:2024:PYV,
author = "Xiang Ao and Ling Luo and Xiting Wang and Zhao Yang
and Jiun-Hung Chen and Ying Qiao and Qing He and Xing
Xie",
title = "Put Your Voice on Stage: Personalized Headline
Generation for News Articles",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "54:1--54:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3629168",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3629168",
abstract = "In this article, we study the problem of personalized
news headline generation, which aims to produce not
only concise and fact-consistent titles for news
articles but also decorate these titles as personalized
irresistible reading invitations by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "54",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:GAG,
author = "Ling Chen and Jiahui Xu and Binqing Wu and Jianlong
Huang",
title = "Group-Aware Graph Neural Network for Nationwide City
Air Quality Forecasting",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "55:1--55:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3631713",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3631713",
abstract = "The problem of air pollution threatens public health.
Air quality forecasting can provide the air quality
index hours or even days later, which can help the
public to prevent air pollution in advance. Previous
works focus on citywide air quality \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "55",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liang:2024:LEI,
author = "Yunji Liang and Lei Liu and Luwen Huangfu and Sagar
Samtani and Zhiwen Yu and Daniel D. Zeng",
title = "Learning Entangled Interactions of Complex Causality
via Self-Paced Contrastive Learning",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "56:1--56:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632406",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3632406",
abstract = "Learning causality from large-scale text corpora is an
important task with numerous applications-for example,
in finance, biology, medicine, and scientific
discovery. Prior studies have focused mainly on simple
causality, which only includes one cause-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "56",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hsu:2024:AAC,
author = "Chi-Wei Hsu and Chiao-Ting Chen and Szu-Hao Huang",
title = "Adaptive Adversarial Contrastive Learning for
Cross-Domain Recommendation",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "57:1--57:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3630259",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3630259",
abstract = "Graph-based cross-domain recommendations (CDRs) are
useful for suggesting appropriate items because of
their promising ability to extract features from
user-item interactions and transfer knowledge across
domains. Thus, the model can effectively alleviate
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "57",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ou:2024:SBO,
author = "Weitong Ou and Bo Chen and Xinyi Dai and Weinan Zhang
and Weiwen Liu and Ruiming Tang and Yong Yu",
title = "A Survey on Bid Optimization in Real-Time Bidding
Display Advertising",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "58:1--58:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3628603",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3628603",
abstract = "Real-Time Bidding (RTB) is one of the most important
forms of online advertising, where an auction is hosted
in real time to sell the individual ad impression. How
to design an automated bidding strategy in response to
the dynamic auction environment is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "58",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ni:2024:LOS,
author = "Li Ni and Hefei Xu and Yiwen Zhang and Wenjian Luo and
Yingying Huang and Victor S. Sheng",
title = "Local Overlapping Spatial-aware Community Detection",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "59:1--59:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3634707",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3634707",
abstract = "Local spatial-aware community detection refers to
detecting a spatial-aware community for a given node
using local information. A spatial-aware community
means that nodes in the community are tightly connected
in structure, and their locations are close \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "59",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cai:2024:GDA,
author = "Ruichu Cai and Fengzhu Wu and Zijian Li and Pengfei
Wei and Lingling Yi and Kun Zhang",
title = "Graph Domain Adaptation: a Generative View",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "60:1--60:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3631712",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3631712",
abstract = "Recent years have witnessed tremendous interest in
deep learning on graph-structured data. Due to the high
cost of collecting labeled graph-structured data,
domain adaptation is important to supervised graph
learning tasks with limited samples. However,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "60",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Su:2024:CBF,
author = "Cong Su and Guoxian Yu and Yongqing Zheng and Jun Wang
and Zhengtian Wu and Xiangliang Zhang and Carlotta
Domeniconi",
title = "Causality-Based Fair Multiple Decision by Response
Functions",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "61:1--61:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632529",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3632529",
abstract = "A recent trend of fair machine learning is to build a
decision model subjected to causality-based fairness
requirements, which concern with the causality between
sensitive attributes and decisions. Almost all (if not
all) solutions focus on a single fair \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "61",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Han:2024:LUB,
author = "Di Han and Yifan Huang and Junmin Liu and Kai Liao and
Kunling Lin",
title = "{LSAB}: User Behavioral Pattern Modeling in Sequential
Recommendation by Learning Self-Attention Bias",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "62:1--62:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632625",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3632625",
abstract = "Since the weight of a self-attention model is not
affected by the sequence interval, it can more
accurately and completely describe the user interests,
so it is widely used in processing sequential
recommendation. However, the mainstream self-attention
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "62",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2024:LCP,
author = "Shenyang Huang and Samy Coulombe and Yasmeen Hitti and
Reihaneh Rabbany and Guillaume Rabusseau",
title = "{Laplacian} Change Point Detection for Single and
Multi-view Dynamic Graphs",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "63:1--63:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3631609",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3631609",
abstract = "Dynamic graphs are rich data structures that are used
to model complex relationships between entities over
time. In particular, anomaly detection in temporal
graphs is crucial for many real-world applications such
as intrusion identification in network \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "63",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Saha:2024:PPN,
author = "Swapnil Saha and Hafiz Imtiaz",
title = "Privacy-Preserving Non-Negative Matrix Factorization
with Outliers",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "64:1--64:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632961",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3632961",
abstract = "Non-negative matrix factorization is a popular
unsupervised machine learning algorithm for extracting
meaningful features from inherently non-negative data.
Such data often contain privacy-sensitive user
information. Additionally, the dataset can contain
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "64",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2024:SGI,
author = "Ming-Chuan Yang and Guo-Wei Wong and Meng Chang Chen",
title = "Sparse Grid Imputation Using Unpaired Imprecise
Auxiliary Data: Theory and Application to {PM2.5}
Estimation",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "65:1--65:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3634751",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3634751",
abstract = "Sparse grid imputation (SGI) is a challenging problem,
as its goal is to infer the values of the entire grid
from a limited number of cells with values.
Traditionally, the problem is solved using regression
methods such as KNN and kriging, whereas in the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "65",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2024:PAD,
author = "Han Zhao and Xu Yang and Cheng Deng",
title = "Parameter-Agnostic Deep Graph Clustering",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "66:1--66:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3633783",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3633783",
abstract = "Deep graph clustering, efficiently dividing nodes into
multiple disjoint clusters in an unsupervised manner,
has become a crucial tool for analyzing ubiquitous
graph data. Existing methods have acquired impressive
clustering effects by optimizing the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "66",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:LHT,
author = "Song Wang and Yushun Dong and Xiao Huang and Chen Chen
and Jundong Li",
title = "Learning Hierarchical Task Structures for Few-shot
Graph Classification",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "67:1--67:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3635473",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3635473",
abstract = "The problem of few-shot graph classification targets
at assigning class labels for graph samples, where only
limited labeled graphs are provided for each class. To
solve the problem brought by label scarcity, recent
studies have proposed to adopt the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "67",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rong:2024:TST,
author = "Huan Rong and Xin Yu and Tinghuai Ma and Victor S.
Sheng and Yang Zhou and Al-Rodhaan Mznah",
title = "Three-stage Transferable and Generative Crowdsourced
Comment Integration Framework Based on Zero- and
Few-shot Learning with Domain Distribution Alignment",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "68:1--68:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3636511",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3636511",
abstract = "Online shopping has become a crucial way to encourage
daily consumption, where the User-generated, or
crowdsourced product comments, can offer a broad range
of feedback on e-commerce products. As a result,
integrating critical opinions or major attitudes
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "68",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{DiPalma:2024:EVS,
author = "Luciano {Di Palma} and Yanlei Diao and Anna Liu",
title = "Efficient Version Space Algorithms for
Human-in-the-loop Model Development",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "69:1--69:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3637443",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3637443",
abstract = "When active learning (AL) is applied to help users
develop a model on a large dataset through
interactively presenting data instances for labeling,
existing AL techniques often suffer from two main
drawbacks: First, to reach high accuracy they may
require \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "69",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tipirneni:2024:SSA,
author = "Sindhu Tipirneni and Ming Zhu and Chandan K. Reddy",
title = "{StructCoder}: Structure-Aware Transformer for Code
Generation",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "70:1--70:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3636430",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3636430",
abstract = "There has been a recent surge of interest in
automating software engineering tasks using deep
learning. This article addresses the problem of code
generation, in which the goal is to generate target
code given source code in a different language or a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "70",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Fan:2024:DDE,
author = "Wei Fan and Yanjie Fu and Shun Zheng and Jiang Bian
and Yuanchun Zhou and Hui Xiong",
title = "{DEWP}: Deep Expansion Learning for Wind Power
Forecasting",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "71:1--71:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3637552",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3637552",
abstract = "Wind is one kind of high-efficient,
environmentally-friendly, and cost-effective energy
source. Wind power, as one of the largest renewable
energy in the world, has been playing a more and more
important role in supplying electricity. Though growing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "71",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:EFC,
author = "Zhe Liu and Sukumar Letchmunan",
title = "Enhanced Fuzzy Clustering for Incomplete Instance with
Evidence Combination",
journal = j-TKDD,
volume = "18",
number = "3",
pages = "72:1--72:??",
month = apr,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638061",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jan 15 11:01:47 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638061",
abstract = "Clustering incomplete instance is still a challenging
task since missing values maybe make the cluster
information ambiguous, leading to the uncertainty and
imprecision in results. This article investigates an
enhanced fuzzy clustering with evidence \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "72",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Jian:2024:SSS,
author = "Meng Jian and Yulong Bai and Jingjing Guo and Lifang
Wu",
title = "Swarm Self-supervised Hypergraph Embedding for
Recommendation",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "73:1--73:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638058",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638058",
abstract = "The information era brings both opportunities and
challenges to information services. Confronting
information overload, recommendation technology is
dedicated to filtering personalized content to meet
users' requirements. The extremely sparse interaction
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "73",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:PTQ,
author = "Wentao Wang and Huifang Ma and Yan Zhao and Zhixin
Li",
title = "Pre-training Question Embeddings for Improving
Knowledge Tracing with Self-supervised Bi-graph
Co-contrastive Learning",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "74:1--74:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638055",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638055",
abstract = "Learning high-quality vector representations (aka.
embeddings) of educational questions lies at the core
of knowledge tracing (KT), which defines a task of
estimating students' knowledge states by predicting the
probability that they correctly answer \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "74",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Piao:2024:IHL,
author = "Minghao Piao and Yi Sheng and Jinda Yan and Cheng Hao
Jin",
title = "Image Hash Layer Triggered {CNN} Framework for Wafer
Map Failure Pattern Retrieval and Classification",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "75:1--75:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638053",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638053",
abstract = "Recently, deep learning methods are often used in
wafer map failure pattern classification. CNN requires
less feature engineering but still needs preprocessing,
e.g., denoising and resizing. Denoising is used to
improve the quality of the input data, and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "75",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xiao:2024:TGW,
author = "Meng Xiao and Dongjie Wang and Min Wu and Kunpeng Liu
and Hui Xiong and Yuanchun Zhou and Yanjie Fu",
title = "Traceable Group-Wise Self-Optimizing Feature
Transformation Learning: a Dual Optimization
Perspective",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "76:1--76:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638059",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638059",
abstract = "Feature transformation aims to reconstruct an
effective representation space by mathematically
refining the existing features. It serves as a pivotal
approach to combat the curse of dimensionality, enhance
model generalization, mitigate data sparsity, and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "76",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:GBT,
author = "Ximing Li and Bing Wang and Yang Wang and Meng Wang",
title = "Graph-based Text Classification by Contrastive
Learning with Text-level Graph Augmentation",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "77:1--77:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638353",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638353",
abstract = "Text Classification (TC) is a fundamental task in the
information retrieval community. Nowadays, the mainstay
TC methods are built on the deep neural networks, which
can learn much more discriminative text features than
the traditional shallow learning \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "77",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:CMM,
author = "Tengfei Liu and Yongli Hu and Junbin Gao and Yanfeng
Sun and Baocai Yin",
title = "Cross-modal Multiple Granularity Interactive Fusion
Network for Long Document Classification",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "78:1--78:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3631711",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3631711",
abstract = "Long Document Classification (LDC) has attracted great
attention in Natural Language Processing and achieved
considerable progress owing to the large-scale
pre-trained language models. In spite of this, as a
different problem from the traditional text \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "78",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bozdag:2024:MMG,
author = "Mustafa Bozdag and Nurullah Sevim and Aykut
Ko{\c{c}}",
title = "Measuring and Mitigating Gender Bias in Legal
Contextualized Language Models",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "79:1--79:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3628602",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3628602",
abstract = "Transformer-based contextualized language models
constitute the state-of-the-art in several natural
language processing (NLP) tasks and applications.
Despite their utility, contextualized models can
contain human-like social biases, as their training
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "79",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:TOS,
author = "Chunkai Zhang and Maohua Lyu and Wensheng Gan and
Philip S. Yu",
title = "Totally-ordered Sequential Rules for Utility
Maximization",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "80:1--80:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3628450",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3628450",
abstract = "High-utility sequential pattern mining (HUSPM) is a
significant and valuable activity in knowledge
discovery and data analytics with many real-world
applications. In some cases, HUSPM can not provide an
excellent measure to predict what will happen. High-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "80",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2024:LEE,
author = "Yi Yang and Zhong-Qiu Zhao and Gongqing Wu and Xingrui
Zhuo and Qing Liu and Quan Bai and Weihua Li",
title = "A Lightweight, Effective, and Efficient Model for
Label Aggregation in Crowdsourcing",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "81:1--81:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3630102",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3630102",
abstract = "Due to the presence of noise in crowdsourced labels,
label aggregation (LA) has become a standard procedure
for post-processing these labels. LA methods estimate
true labels from crowdsourced labels by modeling worker
quality. However, most existing LA \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "81",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Feng:2024:AAG,
author = "Shengyu Feng and Baoyu Jing and Yada Zhu and Hanghang
Tong",
title = "{ArieL}: Adversarial Graph Contrastive Learning",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "82:1--82:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638054",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638054",
abstract = "Contrastive learning is an effective unsupervised
method in graph representation learning. The key
component of contrastive learning lies in the
construction of positive and negative samples. Previous
methods usually utilize the proximity of nodes in the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "82",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ding:2024:RGM,
author = "Kaize Ding and Jianling Wang and Jundong Li and James
Caverlee and Huan Liu",
title = "Robust Graph Meta-Learning for Weakly Supervised
Few-Shot Node Classification",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "83:1--83:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3630260",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3630260",
abstract = "Graph machine learning (Graph ML) models typically
require abundant labeled instances to provide
sufficient supervision signals, which is commonly
infeasible in real-world scenarios since labeled data
for newly emerged concepts (e.g., new categorizations
\ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "83",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2024:ELD,
author = "Weiyao Zhu and Ou Wu and Fengguang Su and Yingjun
Deng",
title = "Exploring the Learning Difficulty of Data: Theory and
Measure",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "84:1--84:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3636512",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3636512",
abstract = "``Easy\slash hard sample'' is a popular parlance in
machine learning. Learning difficulty of samples refers
to how easy/hard a sample is during a learning
procedure. An increasing need of measuring learning
difficulty demonstrates its importance in machine
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "84",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:TUL,
author = "Wei Chen and Chao Huang and Yanwei Yu and Yongguo
Jiang and Junyu Dong",
title = "Trajectory-User Linking via Hierarchical
Spatio-Temporal Attention Networks",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "85:1--85:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3635718",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3635718",
abstract = "Trajectory-User Linking (TUL) is crucial for human
mobility modeling by linking different trajectories to
users with the exploration of complex mobility
patterns. Existing works mainly rely on the recurrent
neural framework to encode the temporal \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "85",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:RGN,
author = "Gang Liu and Eric Inae and Tengfei Luo and Meng
Jiang",
title = "Rationalizing Graph Neural Networks with Data
Augmentation",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "86:1--86:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638781",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638781",
abstract = "Graph rationales are representative subgraph
structures that best explain and support the graph
neural network (GNN) predictions. Graph rationalization
involves the joint identification of these subgraphs
during GNN training, resulting in improved \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "86",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Long:2024:LGM,
author = "Chao Long and Huanhuan Yuan and Junhua Fang and
Xuefeng Xian and Guanfeng Liu and Victor S. Sheng and
Pengpeng Zhao",
title = "Learning Global and Multi-granularity Local
Representation with {MLP} for Sequential
Recommendation",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "87:1--87:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638562",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638562",
abstract = "Sequential recommendation aims to predict the next
item of interest to users based on their historical
behavior data. Usually, users' global and local
preferences jointly affect the final recommendation
result in different ways. Most existing works use
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "87",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xu:2024:DLN,
author = "Hui Xu and Liyao Xiang and Xiaoying Gan and Luoyi Fu
and Xinbing Wang and Chenghu Zhou",
title = "Distributional Learning for Network Alignment with
Global Constraints",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "88:1--88:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638056",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638056",
abstract = "Network alignment, pairing corresponding nodes across
the source and target networks, plays an important role
in many data mining tasks. Extensive studies focus on
learning node embeddings across different networks in a
unified space. However, these \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "88",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:BGL,
author = "Guixian Zhang and Shichao Zhang and Guan Yuan",
title = "{Bayesian} Graph Local Extrema Convolution with
Long-tail Strategy for Misinformation Detection",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "89:1--89:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639408",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639408",
abstract = "It has become a cardinal task to identify fake
information (misinformation) on social media, because
it has significantly harmed the government and the
public. There are many spam bots maliciously retweeting
misinformation. This study proposes an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "89",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Shu:2024:MIL,
author = "Senlin Shu and Deng-Bao Wang and Suqin Yuan and
Hongxin Wei and Jiuchuan Jiang and Lei Feng and
Min-Ling Zhang",
title = "Multiple-instance Learning from Triplet Comparison
Bags",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "90:1--90:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638776",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638776",
abstract = "Multiple-instance learning (MIL) solves the problem
where training instances are grouped in bags, and a
binary (positive or negative) label is provided for
each bag. Most of the existing MIL studies need fully
labeled bags for training an effective \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "90",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:PDP,
author = "Yilin Wang and Sha Zhao and Shiwei Zhao and Runze Wu
and Yuhong Xu and Jianrong Tao and Tangjie Lv and
Shijian Li and Zhipeng Hu and Gang Pan",
title = "{PU-Detector}: a {PU} Learning-based Framework for
Real Money Trading Detection in {MMORPG}",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "91:1--91:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638561",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638561",
abstract = "Massive multiplayer online role-playing games (MMORPG)
have been becoming one of the most popular and exciting
online games. In recent years, a cheating phenomenon
called real money trading (RMT) has arisen and damaged
the fantasy world in many ways. RMT \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "91",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:DSA,
author = "Yuhong Zhang and Jianqing Wu and Kui Yu and Xindong
Wu",
title = "Diverse Structure-Aware Relation Representation in
Cross-Lingual Entity Alignment",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "92:1--92:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638778",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638778",
abstract = "Cross-lingual entity alignment (CLEA) aims to find
equivalent entity pairs between knowledge graphs (KGs)
in different languages. It is an important way to
connect heterogeneous KGs and facilitate knowledge
completion. Existing methods have found that \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "92",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Geeganage:2024:SET,
author = "Dakshi Kapugama Geeganage and Yue Xu and Yuefeng Li",
title = "A Semantics-enhanced Topic Modelling Technique:
{Semantic-LDA}",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "93:1--93:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639409",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639409",
abstract = "Topic modelling is a beneficial technique used to
discover latent topics in text collections. But to
correctly understand the text content and generate a
meaningful topic list, semantics are important. By
ignoring semantics, that is, not attempting to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "93",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ren:2024:HPG,
author = "Yuyang Ren and Haonan Zhang and Luoyi Fu and Shiyu
Liang and Lei Zhou and Xinbing Wang and Xinde Cao and
Fei Long and Chenghu Zhou",
title = "{Hi-PART}: Going Beyond Graph Pooling with
Hierarchical Partition Tree for Graph-Level
Representation Learning",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "94:1--94:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3636429",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3636429",
abstract = "Graph pooling refers to the operation that maps a set
of node representations into a compact form for
graph-level representation learning. However, existing
graph pooling methods are limited by the power of the
Weisfeiler-Lehman (WL) test in the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "94",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Balalau:2024:FSM,
author = "Oana Balalau and Francesco Bonchi and T-H. Hubert Chan
and Francesco Gullo and Mauro Sozio and Hao Xie",
title = "Finding Subgraphs with Maximum Total Density and
Limited Overlap in Weighted Hypergraphs",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "95:1--95:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639410",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639410",
abstract = "Finding dense subgraphs in large (hyper)graphs is a
key primitive in a variety of real-world application
domains, encompassing social network analytics, event
detection, biology, and finance. In most such
applications, one typically aims at finding \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "95",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:CDA,
author = "Jinpeng Li and Hang Yu and Zhenyu Zhang and Xiangfeng
Luo and Shaorong Xie",
title = "Concept Drift Adaptation by Exploiting Drift Type",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "96:1--96:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638777",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638777",
abstract = "Concept drift is a phenomenon where the distribution
of data streams changes over time. When this happens,
model predictions become less accurate. Hence, models
built in the past need to be re-learned for the current
data. Two design questions need to be \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "96",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xiao:2024:NGN,
author = "Feng Xiao and Youfa Liu and Jia Shao",
title = "{NNC-GCN}: Neighbours-to-Neighbours Contrastive Graph
Convolutional Network for Semi-Supervised
Classification",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "97:1--97:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638780",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638780",
abstract = "Contrastive learning (CL) is a popular learning
paradigm in deep learning, which uses contrastive
principle to learn low-dimensional embeddings, and has
been applied in Graph Neural Networks (GNNs)
successfully. Existing works of contrastive multi-view
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "97",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lai:2024:IAM,
author = "Jinrong Lai and Tong Wang and Chuan Chen and Zibin
Zheng",
title = "Information-aware Multi-view Outlier Detection",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "98:1--98:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638354",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638354",
abstract = "With the development of multi-view learning,
multi-view outlier detection has received increasing
attention in recent years. However, the current
research still faces two challenges: (1) The current
research lacks theoretical analysis tools for
multi-view \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "98",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Tu:2024:PFL,
author = "Jingke Tu and Jiaming Huang and Lei Yang and Wanyu
Lin",
title = "Personalized Federated Learning with Layer-Wise
Feature Transformation via Meta-Learning",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "99:1--99:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638252",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638252",
abstract = "Federated learning enables multiple clients to
collaboratively learn machine learning models in a
privacy-preserving manner. However, in real-world
scenarios, a key challenge encountered in federated
learning is the statistical heterogeneity among
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "99",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Khan:2024:HBP,
author = "Mehak Khan and Gustavo B. M. Mello and Laurence Habib
and Paal Engelstad and Anis Yazidi",
title = "{HITS}-based Propagation Paradigm for Graph Neural
Networks",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "100:1--100:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638779",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638779",
abstract = "In this article, we present a new propagation paradigm
based on the principle of Hyperlink-Induced Topic
Search (HITS) algorithm. The HITS algorithm utilizes
the concept of a ``self-reinforcing'' relationship of
authority-hub. Using HITS, the centrality of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "100",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ghahramanian:2024:NNE,
author = "Pouya Ghahramanian and Sepehr Bakhshi and Hamed Bonab
and Fazli Can",
title = "A Novel Neural Ensemble Architecture for On-the-fly
Classification of Evolving Text Streams",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "101:1--101:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639054",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639054",
abstract = "We study on-the-fly classification of evolving text
streams in which the relation between the input data
and target labels changes over time-i.e., ``concept
drift.'' These variations decrease the model's
performance, as predictions become less accurate over
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "101",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:DDG,
author = "Ying Zhang and Zhiqiang Zhao and Zhuo Feng",
title = "{diGRASS}: Directed Graph Spectral Sparsification via
Spectrum-Preserving Symmetrization",
journal = j-TKDD,
volume = "18",
number = "4",
pages = "102:1--102:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639568",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:57 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639568",
abstract = "Recent spectral graph sparsification research aims to
construct ultra-sparse subgraphs for preserving the
original graph spectral (structural) properties, such
as the first few Laplacian eigenvalues and
eigenvectors, which has led to the development of a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "102",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pan:2024:EDD,
author = "Yicheng Pan and Yifan Zhang and Xinrui Jiang and Meng
Ma and Ping Wang",
title = "{EffCause}: Discover Dynamic Causal Relationships
Efficiently from Time-Series",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "105:1--105:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3640818",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3640818",
abstract = "Since the proposal of Granger causality, many
researchers have followed the idea and developed
extensions to the original algorithm. The classic
Granger causality test aims to detect the existence of
the static causal relationship. Notably, a fundamental
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "105",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yao:2024:ALG,
author = "Kai-Lang Yao and Wu-Jun Li",
title = "Asymmetric Learning for Graph Neural Network based
Link Prediction",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "106:1--106:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3640347",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3640347",
abstract = "Link prediction is a fundamental problem in many
graph-based applications, such as protein-protein
interaction prediction. Recently, graph neural network
(GNN) has been widely used for link prediction.
However, existing GNN-based link prediction (GNN-LP)
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "106",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Guo:2024:MTL,
author = "Xiaobo Guo and Mingming Ha and Xuewen Tao and
Shaoshuai Li and Youru Li and Zhenfeng Zhu and Zhiyong
Shen and Li Ma",
title = "Multi-Task Learning with Sequential Dependence Toward
Industrial Applications: a Systematic Formulation",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "107:1--107:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3640468",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3640468",
abstract = "Multi-task learning (MTL) is widely used in the online
recommendation and financial services for multi-step
conversion estimation, but current works often overlook
the sequential dependence among tasks. In particular,
sequential dependence multi-task \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "107",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:PAT,
author = "Lei Zhang and Yong Liu and Zhiwei Zeng and Yiming Cao
and Xingyu Wu and Yonghui Xu and Zhiqi Shen and Lizhen
Cui",
title = "Package Arrival Time Prediction via Knowledge
Distillation Graph Neural Network",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "108:1--108:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643033",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643033",
abstract = "Accurately estimating packages' arrival time in
e-commerce can enhance users' shopping experience and
improve the placement rate of products. This problem is
often formalized as an Origin-Destination (OD)-based
ETA (i.e., estimated time of arrival) \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "108",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kuo:2024:CAG,
author = "Chuan-Wei Kuo and Bo-Yu Chen and Wen-Chih Peng and
Chih-Chieh Hung and Hsin-Ning Su",
title = "Correlation-aware Graph Data Augmentation with
Implicit and Explicit Neighbors",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "109:1--109:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638057",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638057",
abstract = "In recent years, there has been a significant surge in
commercial demand for citation graph-based tasks, such
as patent analysis, social network analysis, and
recommendation systems. Graph Neural Networks (GNNs)
are widely used for these tasks due to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "109",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Duan:2024:ACT,
author = "Mingxing Duan and Kenli Li and Weinan Zhang and Jiarui
Qin and Bin Xiao",
title = "Attacking Click-through Rate Predictors via Generating
Realistic Fake Samples",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "110:1--110:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643685",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643685",
abstract = "How to construct imperceptible (realistic) fake
samples is critical in adversarial attacks. Due to the
sample feature diversity of a recommender system
(containing both discrete and continuous features),
traditional gradient-based adversarial attack
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "110",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Khodabandehlou:2024:FUF,
author = "Samira Khodabandehlou and Alireza Hashemi
Golpayegani",
title = "{FiFrauD}: Unsupervised Financial Fraud Detection in
Dynamic Graph Streams",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "111:1--111:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3641857",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3641857",
abstract = "Given a stream of financial transactions between
traders in an e-market, how can we accurately detect
fraudulent traders and suspicious behaviors in real
time? Despite the efforts made in detecting these
fraudsters, this field still faces serious \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "111",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2024:PEC,
author = "Jianshan Sun and Suyuan Mei and Kun Yuan and Yuanchun
Jiang and Jie Cao",
title = "Prerequisite-Enhanced Category-Aware Graph Neural
Networks for Course Recommendation",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "112:1--112:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643644",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643644",
abstract = "The rapid development of Massive Open Online Courses
(MOOCs) platforms has created an urgent need for an
efficient personalized course recommender system that
can assist learners of all backgrounds and levels of
knowledge in selecting appropriate courses. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "112",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Deng:2024:DGT,
author = "Songgaojun Deng and Olivier Sprangers and Ming Li and
Sebastian Schelter and Maarten de Rijke",
title = "Domain Generalization in Time Series Forecasting",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "113:1--113:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643035",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643035",
abstract = "Domain generalization aims to design models that can
effectively generalize to unseen target domains by
learning from observed source domains. Domain
generalization poses a significant challenge for time
series data, due to varying data distributions and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "113",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2024:TTS,
author = "Gengsen Huang and Wensheng Gan and Philip S. Yu",
title = "{TaSPM}: Targeted Sequential Pattern Mining",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "114:1--114:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639827",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639827",
abstract = "Sequential pattern mining (SPM) is an important
technique in the field of pattern mining, which has
many applications in reality. Although many efficient
SPM algorithms have been proposed, there are few
studies that can focus on targeted tasks. Targeted
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "114",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2024:NTS,
author = "Yichen Zhu and Bo Jiang and Haiming Jin and Mengtian
Zhang and Feng Gao and Jianqiang Huang and Tao Lin and
Xinbing Wang",
title = "Networked Time-series Prediction with Incomplete Data
via Generative Adversarial Network",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "115:1--115:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643822",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643822",
abstract = "A networked time series (NETS) is a family of time
series on a given graph, one for each node. It has a
wide range of applications from intelligent
transportation to environment monitoring to smart grid
management. An important task in such applications
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "115",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rong:2024:CRC,
author = "Huan Rong and Minfeng Qian and Tinghuai Ma and Di Jin
and Victor S. Sheng",
title = "{CoBjeason}: Reasoning Covered Object in Image by
Multi-Agent Collaboration Based on Informed Knowledge
Graph",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "116:1--116:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643565",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643565",
abstract = "Object detection is a widely studied problem in
existing works. However, in this paper, we turn to a
more challenging problem of `` Covered Object Reasoning
'', aimed at reasoning the category label of target
object in the given image particularly when it has
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "116",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pandey:2024:XDR,
author = "Pradumn Kumar Pandey and Aikta Arya and Akrati
Saxena",
title = "{X-distribution}: Retraceable Power-law Exponent of
Complex Networks",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "117:1--117:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639413",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639413",
abstract = "Network modeling has been explored extensively by
means of theoretical analysis as well as numerical
simulations for Network Reconstruction (NR). The
network reconstruction problem requires the estimation
of the power-law exponent ($ \gamma $) of a given input
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "117",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Moradnia:2024:SCP,
author = "Sajedeh Moradnia and Mousa Golalizadeh",
title = "Supervised Clustering of {Persian} Handwritten Images
Using Regularization and Dimension Reduction Methods",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "118:1--118:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638060",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638060",
abstract = "Clustering, as a fundamental exploratory data
technique, not only is used to discover patterns and
structures in complex datasets but also is utilized to
group variables in high-dimensional data analysis.
Dimension reduction through clustering helps \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "118",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:GTS,
author = "Hongjie Chen and Hoda Eldardiry",
title = "Graph Time-series Modeling in Deep Learning: a
Survey",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "119:1--119:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3638534",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3638534",
abstract = "Time-series and graphs have been extensively studied
for their ubiquitous existence in numerous domains.
Both topics have been separately explored in the field
of deep learning. For time-series modeling, recurrent
neural networks or convolutional neural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "119",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Poulakis:2024:SAM,
author = "Yannis Poulakis and Christos Doulkeridis and
Dimosthenis Kyriazis",
title = "A Survey on {AutoML} Methods and Systems for
Clustering",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "120:1--120:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643564",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643564",
abstract = "Automated Machine Learning (AutoML) aims to identify
the best-performing machine learning algorithm along
with its input parameters for a given dataset and a
specific machine learning task. This is a challenging
problem, as the process of finding the best \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "120",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rodriguez-Gonzalez:2024:XFN,
author = "Ansel Y. Rodr{\'\i}guez-Gonz{\'a}lez and Ram{\'o}n
Aranda and Miguel {\'A}. {\'A}lvarez-Carmona and Angel
D{\'\i}az-Pacheco and Rosa Mar{\'\i}a Valdovinos
Rosas",
title = "{X-FSPMiner}: a Novel Algorithm for Frequent Similar
Pattern Mining",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "121:1--121:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643820",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643820",
abstract = "Frequent similar pattern mining (FSP mining) allows
for finding frequent patterns hidden from the classical
approach. However, the use of similarity functions
implies more computational effort, necessitating the
development of more efficient algorithms \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "121",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Luan:2024:MIL,
author = "Tianxiang Luan and Shilin Gu and Xijia Tang and
Wenzhang Zhuge and Chenping Hou",
title = "Multi-Instance Learning with One Side Label Noise",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "122:1--122:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3644076",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3644076",
abstract = "Multi-instance Learning (MIL) is a popular learning
paradigm arising from many real applications. It
assigns a label to a set of instances, which is called
a bag, and the bag's label is determined by the
instances within it. A bag is positive if and only
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "122",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qin:2024:MWP,
author = "Wei Qin and Xiaowei Wang and Zhenzhen Hu and Lei Wang
and Yunshi Lan and Richang Hong",
title = "Math Word Problem Generation via Disentangled Memory
Retrieval",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "123:1--123:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639569",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639569",
abstract = "The task of math word problem (MWP) generation, which
generates an MWP given an equation and relevant topic
words, has increasingly attracted researchers'
attention. In this work, we introduce a simple memory
retrieval module to search related training \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "123",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:VHL,
author = "Haobo Wang and Cheng Peng and Hede Dong and Lei Feng
and Weiwei Liu and Tianlei Hu and Ke Chen and Gang
Chen",
title = "On the Value of Head Labels in Multi-Label Text
Classification",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "124:1--124:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643853",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643853",
abstract = "A formidable challenge in the multi-label text
classification (MLTC) context is that the labels often
exhibit a long-tailed distribution, which typically
prevents deep MLTC models from obtaining satisfactory
performance. To alleviate this problem, most \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "124",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Hu:2024:TDP,
author = "Wentao Hu and Hui Fang",
title = "Towards Differential Privacy in Sequential
Recommendation: a Noisy Graph Neural Network Approach",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "125:1--125:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643821",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643821",
abstract = "With increasing frequency of high-profile privacy
breaches in various online platforms, users are
becoming more concerned about their privacy. And
recommender system is the core component of online
platforms for providing personalized service,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "125",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ni:2024:LCD,
author = "Li Ni and Rui Ye and Wenjian Luo and Yiwen Zhang",
title = "Local Community Detection in Multiple Private
Networks",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "126:1--126:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3644078",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3644078",
abstract = "Individuals are often involved in multiple online
social networks. Considering that owners of these
networks are unwilling to share their networks, some
global algorithms combine information from multiple
networks to detect all communities in multiple
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "126",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sui:2024:EDG,
author = "Yongduo Sui and Wenyu Mao and Shuyao Wang and Xiang
Wang and Jiancan Wu and Xiangnan He and Tat-Seng Chua",
title = "Enhancing Out-of-distribution Generalization on Graphs
via Causal Attention Learning",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "127:1--127:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3644392",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3644392",
abstract = "In graph classification, attention- and pooling-based
graph neural networks (GNNs) predominate to extract
salient features from the input graph and support the
prediction. They mostly follow the paradigm of
``learning to attend,'' which maximizes the mutual
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "127",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2024:IML,
author = "Kai Sun and Huajie Jiang and Yongli Hu and Baocai
Yin",
title = "Incorporating Multi-Level Sampling with Adaptive
Aggregation for Inductive Knowledge Graph Completion",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "128:1--128:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3644822",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3644822",
abstract = "In recent years, Graph Neural Networks (GNNs) have
achieved unprecedented success in handling
graph-structured data, thereby driving the development
of numerous GNN-oriented techniques for inductive
knowledge graph completion (KGC). A key limitation of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "128",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yao:2024:TLP,
author = "Rujing Yao and Ou Wu",
title = "A Taxonomy for Learning with Perturbation and
Algorithms",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "129:1--129:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3644391",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3644391",
abstract = "Weighting strategy prevails in machine learning. For
example, a common approach in robust machine learning
is to exert low weights on samples which are likely to
be noisy or quite hard. This study summarizes another
less-explored strategy, namely, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "129",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Han:2024:GBM,
author = "Yuehui Han",
title = "Generation-based Multi-view Contrast for
Self-supervised Graph Representation Learning",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "130:1--130:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3645095",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3645095",
abstract = "Graph contrastive learning has made remarkable
achievements in the self-supervised representation
learning of graph-structured data. By employing
perturbation function (i.e., perturbation on the nodes
or edges of graph), most graph contrastive learning
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "130",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Singh:2024:MTH,
author = "Kuldeep Singh and Bhaskar Biswas",
title = "Mining Top-$k$ High On-shelf Utility Itemsets Using
Novel Threshold Raising Strategies",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "131:1--131:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3645115",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3645115",
abstract = "High utility itemsets (HUIs) mining is an emerging
area of data mining which discovers sets of items
generating a high profit from transactional datasets.
In recent years, several algorithms have been proposed
for this task. However, most of them do not \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "131",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Pingi:2024:CGA,
author = "Sharon Torao Pingi and Richi Nayak and Md Abul
Bashar",
title = "Conditional Generative Adversarial Network for Early
Classification of Longitudinal Datasets Using an
Imputation Approach",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "132:1--132:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3644821",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3644821",
abstract = "Early classification of longitudinal data remains an
active area of research today. The complexity of these
datasets and the high rates of missing data caused by
irregular sampling present data-level challenges for
the Early Longitudinal Data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "132",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Dornaika:2024:SIS,
author = "Fadi Dornaika and Zoulfikar Ibrahim and Alirezah
Bosaghzadeh",
title = "Scalable and Inductive Semi-supervised Classifier with
Sample Weighting Based on Graph Topology",
journal = j-TKDD,
volume = "18",
number = "5",
pages = "133:1--133:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643645",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:42:59 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643645",
abstract = "Recently, graph-based semi-supervised learning (GSSL)
has garnered significant interest in the realms of
machine learning and pattern recognition. Although some
of the proposed methods have made some progress, there
are still some shortcomings that need \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "133",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2024:SBA,
author = "Yang Yang and Feifei Wang and Enqiang Zhu and Fei
Jiang and Wen Yao",
title = "Social Behavior Analysis in Exclusive Enterprise
Social Networks by {FastHAND}",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "134:1--134:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3646552",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3646552",
abstract = "There is an emerging trend in the Chinese automobile
industries that automakers are introducing exclusive
enterprise social networks (EESNs) to expand sales and
provide after-sale services. The traditional online
social networks (OSNs) and enterprise \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "134",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:BTB,
author = "Huiping Chen and Alessio Conte and Roberto Grossi and
Grigorios Loukides and Solon P. Pissis and Michelle
Sweering",
title = "On Breaking Truss-based and Core-based Communities",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "135:1--135:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3644077",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3644077",
abstract = "We introduce the general problem of identifying a
smallest edge subset of a given graph whose deletion
makes the graph community-free. We consider this
problem under two community notions that have attracted
significant attention: k -truss and k -core. We
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "135",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:ISD,
author = "Xuefei Li and Huiwei Zhou and Weihong Yao and Wenchu
Li and Baojie Liu and Yingyu Lin",
title = "Intricate Spatiotemporal Dependency Learning for
Temporal Knowledge Graph Reasoning",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "136:1--136:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3648366",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3648366",
abstract = "Knowledge Graph (KG) reasoning has been an interesting
topic in recent decades. Most current researches focus
on predicting the missing facts for incomplete KG.
Nevertheless, Temporal KG (TKG) reasoning, which is to
forecast future facts, still faces with \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "136",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zheng:2024:PPA,
author = "Yimei Zheng and Caiyan Jia",
title = "{ProtoMGAE}: Prototype-Aware Masked Graph Auto-Encoder
for Graph Representation Learning",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "137:1--137:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649143",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649143",
abstract = "Graph self-supervised representation learning has
gained considerable attention and demonstrated
remarkable efficacy in extracting meaningful
representations from graphs, particularly in the
absence of labeled data. Two representative methods in
this \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "137",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:FAG,
author = "April Chen and Ryan A. Rossi and Namyong Park and Puja
Trivedi and Yu Wang and Tong Yu and Sungchul Kim and
Franck Dernoncourt and Nesreen K. Ahmed",
title = "Fairness-Aware Graph Neural Networks: a Survey",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "138:1--138:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649142",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649142",
abstract = "Graph Neural Networks (GNNs) have become increasingly
important due to their representational power and
state-of-the-art predictive performance on many
fundamental learning tasks. Despite this success, GNNs
suffer from fairness issues that arise as a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "138",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:BSB,
author = "Acong Zhang and Jincheng Huang and Ping Li and Kai
Zhang",
title = "Building Shortcuts between Distant Nodes with Biaffine
Mapping for Graph Convolutional Networks",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "139:1--139:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3650113",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3650113",
abstract = "Multiple recent studies show a paradox in graph
convolutional networks (GCNs)-that is, shallow
architectures limit the capability of learning
information from high-order neighbors, whereas deep
architectures suffer from over-smoothing or
over-squashing. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "139",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:DGN,
author = "Zhe Chen and Aixin Sun",
title = "{DP-GCN}: Node Classification by Connectivity and
Local Topology Structure on Real-World Network",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "140:1--140:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649460",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649460",
abstract = "Node classification is to predict the class label of a
node by analyzing its properties and interactions in a
network. We note that many existing solutions for
graph-based node classification only consider node
connectivity but not the node's local \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "140",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ali:2024:SSS,
author = "Sarwan Ali and Muhammad Ahmad and Maham Anwer Beg and
Imdad Ullah Khan and Safiullah Faizullah and Muhammad
Asad Khan",
title = "{SsAG}: Summarization and Sparsification of Attributed
Graphs",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "141:1--141:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3651619",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3651619",
abstract = "Graph summarization has become integral for managing
and analyzing large-scale graphs in diverse real-world
applications, including social networks, biological
networks, and communication networks. Existing methods
for graph summarization often face \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "141",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Song:2024:MSM,
author = "Derun Song and Enneng Yang and Guibing Guo and Li Shen
and Linying Jiang and Xingwei Wang",
title = "Multi-Scenario and Multi-Task Aware Feature
Interaction for Recommendation System",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "142:1--142:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3651312",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3651312",
abstract = "Multi-scenario and multi-task recommendation can use
various feedback behaviors of users in different
scenarios to learn users' preferences and then make
recommendations, which has attracted attention.
However, the existing work ignores feature \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "142",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Connor:2024:NZN,
author = "Richard Connor and Lucia Vadicamo",
title = "{nSimplex Zen}: a Novel Dimensionality Reduction for
{Euclidean} and {Hilbert} Spaces",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "143:1--143:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3647642",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3647642",
abstract = "Dimensionality reduction techniques map values from a
high dimensional space to one with a lower dimension.
The result is a space which requires less physical
memory and has a faster distance calculation. These
techniques are widely used where required \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "143",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ji:2024:CFM,
author = "Taoran Ji and Nathan Self and Kaiqun Fu and Zhiqian
Chen and Naren Ramakrishnan and Chang-Tien Lu",
title = "Citation Forecasting with Multi-Context
Attention-Aided Dependency Modeling",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "144:1--144:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649140",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649140",
abstract = "Forecasting citations of scientific patents and
publications is a crucial task for understanding the
evolution and development of technological domains and
for foresight into emerging technologies. By construing
citations as a time series, the task can be \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "144",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2024:NEP,
author = "Houquan Zhou and Shenghua Liu and Huawei Shen and
Xueqi Cheng",
title = "Node Embedding Preserving Graph Summarization",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "145:1--145:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649505",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649505",
abstract = "Graph summarization is a useful tool for analyzing
large-scale graphs. Some works tried to preserve
original node embeddings encoding rich structural
information of nodes on the summary graph. However,
their algorithms are designed heuristically and not
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "145",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Theocharidis:2024:ACA,
author = "Konstantinos Theocharidis and Panagiotis Karras and
Manolis Terrovitis and Spiros Skiadopoulos and Hady W.
Lauw",
title = "Adaptive Content-Aware Influence Maximization via
Online Learning to Rank",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "146:1--146:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3651987",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3651987",
abstract = "How can we adapt the composition of a post over a
series of rounds to make it more appealing in a social
network? Techniques that progressively learn how to
make a fixed post more influential over rounds have
been studied in the context of the Influence \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "146",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Paterakis:2024:DWR,
author = "George Paterakis and Stefanos Fafalios and Paulos
Charonyktakis and Vassilis Christophides and Ioannis
Tsamardinos",
title = "Do We Really Need Imputation in {AutoML} Predictive
Modeling?",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "147:1--147:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643643",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643643",
abstract = "Numerous real-world data contain missing values, while
in contrast, most Machine Learning (ML) algorithms
assume complete datasets. For this reason, several
imputation algorithms have been proposed to predict and
fill in the missing values. Given the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "147",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:DDL,
author = "Chi Zhang and Linhao Cai and Meng Chen and Xiucheng Li
and Gao Cong",
title = "{DeepMeshCity}: a Deep Learning Model for Urban Grid
Prediction",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "148:1--148:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3652859",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3652859",
abstract = "Urban grid prediction can be applied to many classic
spatial-temporal prediction tasks such as air quality
prediction, crowd density prediction, and traffic flow
prediction, which is of great importance to smart city
building. In light of its practical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "148",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2024:MSM,
author = "Hongwei Yang and Hui He and Weizhe Zhang and Yan Wang
and Lin Jing",
title = "Multi-Source and Multi-modal Deep Network Embedding
for Cross-Network Node Classification",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "149:1--149:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653304",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653304",
abstract = "In recent years, to address the issue of networked
data sparsity in node classification tasks,
cross-network node classification (CNNC) leverages the
richer information from a source network to enhance the
performance of node classification in the target
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "149",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Rong:2024:LGT,
author = "Can Rong and Zhicheng Liu and Jingtao Ding and Yong
Li",
title = "Learning to Generate Temporal Origin-destination Flow
Based-on Urban Regional Features and Traffic
Information",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "150:1--150:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649141",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649141",
abstract = "Origin-destination (OD) flow contains population
mobility information between every two regions in the
city, which is of great value in urban planning and
transportation management. Nevertheless, the collection
of OD flow data is extremely difficult due \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "150",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Gu:2024:NJC,
author = "Zhibin Gu and Songhe Feng and Zhendong Li and Jiazheng
Yuan and Jun Liu",
title = "{NOODLE}: Joint Cross-View Discrepancy Discovery and
High-Order Correlation Detection for Multi-View
Subspace Clustering",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "151:1--151:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653305",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653305",
abstract = "Benefiting from the effective exploration of the
valuable topological pair-wise relationship of data
points across multiple views, multi-view subspace
clustering (MVSC) has received increasing attention in
recent years. However, we observe that existing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "151",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:DPF,
author = "Yuhan Wang and Qing Xie and Mengzi Tang and Lin Li and
Jingling Yuan and Yongjian Liu",
title = "A Dual Perspective Framework of Knowledge-correlation
for Cross-domain Recommendation",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "152:1--152:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3652520",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3652520",
abstract = "Recommender System provides users with online services
in a personalized way. The performance of traditional
recommender systems may deteriorate because of problems
such as cold-start and data sparsity. Cross-domain
Recommendation System utilizes the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "152",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2024:DER,
author = "Chen Zhao and Feng Mi and Xintao Wu and Kai Jiang and
Latifur Khan and Feng Chen",
title = "Dynamic Environment Responsive Online Meta-Learning
with Fairness Awareness",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "153:1--153:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3648684",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3648684",
abstract = "The fairness-aware online learning framework has
emerged as a potent tool within the context of
continuous lifelong learning. In this scenario, the
learner's objective is to progressively acquire new
tasks as they arrive over time, while also guaranteeing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "153",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2024:HCN,
author = "Hong Zhao and Zhengyu Li and Wenwei He and Yan Zhao",
title = "Hierarchical Convolutional Neural Network with
Knowledge Complementation for Long-Tailed
Classification",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "154:1--154:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653717",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653717",
abstract = "Existing methods based on transfer learning leverage
auxiliary information to help tail generalization and
improve the performance of the tail classes. However,
they cannot fully exploit the relationships between
auxiliary information and tail classes and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "154",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sahebi:2024:MMP,
author = "Sherry Sahebi and Mengfan Yao and Siqian Zhao and Reza
Feyzi Behnagh",
title = "{MoMENt}: Marked Point Processes with Memory-Enhanced
Neural Networks for User Activity Modeling",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "155:1--155:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649504",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649504",
abstract = "Marked temporal point process models (MTPPs) aim to
model event sequences and event markers (associated
features) in continuous time. These models have been
applied to various application domains where capturing
event dynamics in continuous time is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "155",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Choe:2024:RBT,
author = "Minyoung Choe and Jaemin Yoo and Geon Lee and Woonsung
Baek and U. Kang and Kijung Shin",
title = "Representative and Back-In-Time Sampling from
Real-world Hypergraphs",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "156:1--156:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653306",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653306",
abstract = "Graphs are widely used for representing pairwise
interactions in complex systems. Since such real-world
graphs are large and often evergrowing, sampling
subgraphs is useful for various purposes, including
simulation, visualization, stream processing,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "156",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cui:2024:SSM,
author = "Guosheng Cui and Ruxin Wang and Dan Wu and Ye Li",
title = "Semi-supervised Multi-view Clustering based on {NMF}
with Fusion Regularization",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "157:1--157:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653022",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653022",
abstract = "Multi-view clustering has attracted significant
attention and application. Nonnegative matrix
factorization is one popular feature of learning
technology in pattern recognition. In recent years,
many semi-supervised nonnegative matrix factorization
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "157",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Han:2024:DHH,
author = "Jiadi Han and Yufei Tang and Qian Tao and Yuhan Xia
and Liming Zhang",
title = "Dual Homogeneity Hypergraph Motifs with Cross-view
Contrastive Learning for Multiple Social
Recommendations",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "158:1--158:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653976",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653976",
abstract = "Social relations are often used as auxiliary
information to address data sparsity and cold-start
issues in social recommendations. In the real world,
social relations among users are complex and diverse.
Widely used graph neural networks (GNNs) can only
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "158",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:FFF,
author = "Wentai Zhang and HaiHong E. and Haoran Luo and Mingzhi
Sun",
title = "{FulBM}: Fast Fully Batch Maintenance for
Landmark-based $3$-hop Cover Labeling",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "159:1--159:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3650035",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3650035",
abstract = "Landmark-based 3-hop cover labeling is a category of
approaches for shortest distance/path queries on
large-scale complex networks. It pre-computes an index
offline to accelerate the online distance/path query.
Most real-world graphs undergo rapid changes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "159",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2024:HPL,
author = "Jingfeng Yang and Hongye Jin and Ruixiang Tang and
Xiaotian Han and Qizhang Feng and Haoming Jiang and
Shaochen Zhong and Bing Yin and Xia Hu",
title = "Harnessing the Power of {LLMs} in Practice: a Survey
on {ChatGPT} and Beyond",
journal = j-TKDD,
volume = "18",
number = "6",
pages = "160:1--160:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649506",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Tue Apr 30 06:43:01 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649506",
abstract = "This article presents a comprehensive and practical
guide for practitioners and end-users working with
Large Language Models (LLMs) in their downstream
Natural Language Processing (NLP) tasks. We provide
discussions and insights into the usage of LLMs
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "160",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Cui:2024:ISP,
author = "Jingyi Cui and Guangquan Xu and Jian Liu and Shicheng
Feng and Jianli Wang and Hao Peng and Shihui Fu and
Zhaohua Zheng and Xi Zheng and Shaoying Liu",
title = "{ID-SR}: Privacy-Preserving Social Recommendation
Based on Infinite Divisibility for Trustworthy {AI}",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "161:1--161:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639412",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3639412",
abstract = "Recommendation systems powered by artificial
intelligence (AI) are widely used to improve user
experience. However, AI inevitably raises privacy
leakage and other security issues due to the
utilization of extensive user data. Addressing these
challenges \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "161",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2024:MGC,
author = "Xihong Yang and Yiqi Wang and Yue Liu and Yi Wen and
Lingyuan Meng and Sihang Zhou and Xinwang Liu and En
Zhu",
title = "Mixed Graph Contrastive Network for Semi-supervised
Node Classification",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "162:1--162:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3641549",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3641549",
abstract = "Graph Neural Networks (GNNs) have achieved promising
performance in semi-supervised node classification in
recent years. However, the problem of insufficient
supervision, together with representation collapse,
largely limits the performance of the GNNs in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "162",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ling:2024:FFS,
author = "Zhaolong Ling and Enqi Xu and Peng Zhou and Liang Du
and Kui Yu and Xindong Wu",
title = "Fair Feature Selection: a Causal Perspective",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "163:1--163:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643890",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643890",
abstract = "Fair feature selection for classification decision
tasks has recently garnered significant attention from
researchers. However, existing fair feature selection
algorithms fall short of providing a full explanation
of the causal relationship between \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "163",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Kose:2024:FFA,
author = "O. Deniz Kose and Yanning Shen",
title = "{FairGAT}: Fairness-Aware Graph Attention Networks",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "164:1--164:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3645096",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3645096",
abstract = "Graphs can facilitate modeling various complex systems
such as gene networks and power grids as well as
analyzing the underlying relations within them.
Learning over graphs has recently attracted increasing
attention, particularly graph neural network (.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "164",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:DSA,
author = "Shenghao Liu and Yu Zhang and Lingzhi Yi and Xianjun
Deng and Laurence T. Yang and Bang Wang",
title = "Dual-Side Adversarial Learning Based Fair
Recommendation for Sensitive Attribute Filtering",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "165:1--165:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3648683",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3648683",
abstract = "With the development of recommendation algorithms,
researchers are paying increasing attention to fairness
issues such as user discrimination in recommendations.
To address these issues, existing works often filter
users' sensitive information that may \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "165",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ye:2024:BYC,
author = "Tiandi Ye and Cen Chen and Yinggui Wang and Xiang Li
and Ming Gao",
title = "{BapFL}: You can Backdoor Personalized Federated
Learning",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "166:1--166:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649316",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649316",
abstract = "In federated learning (FL), malicious clients could
manipulate the predictions of the trained model through
backdoor attacks, posing a significant threat to the
security of FL systems. Existing research primarily
focuses on backdoor attacks and defenses \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "166",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:VWM,
author = "Feifei Li and Yuanbin Wang and Oya Beyan and Mirjam
Sch{\"o}neck and Liliana Lourenco Caldeira",
title = "Voxel-Wise Medical Image Generalization for
Eliminating Distribution Shift",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "167:1--167:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643034",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3643034",
abstract = "Currently, the medical field is witnessing an increase
in the use of machine learning techniques. Supervised
learning methods adopted in classification, prediction,
and segmentation tasks for medical images always
experience decreased performance when the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "167",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wen:2024:AIT,
author = "Cheng Wen and Yuandao Cai and Bin Zhang and Jie Su and
Zhiwu Xu and Dugang Liu and Shengchao Qin and Zhong
Ming and Tian Cong",
title = "Automatically Inspecting Thousands of Static Bug
Warnings with Large Language Model: How Far Are We?",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "168:1--168:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653718",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653718",
abstract = "Static analysis tools for capturing bugs and
vulnerabilities in software programs are widely
employed in practice, as they have the unique
advantages of high coverage and independence from the
execution environment. However, existing tools for
analyzing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "168",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wu:2024:ASM,
author = "Chenwang Wu and Defu Lian and Yong Ge and Min Zhou and
Enhong Chen",
title = "Attacking Social Media via Behavior Poisoning",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "169:1--169:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3654673",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3654673",
abstract = "Since social media such as Facebook and X (formerly
known as Twitter) have permeated various aspects of
daily life, people have strong incentives to influence
information dissemination on these platforms and
differentiate their content from the fierce \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "169",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Brzezinski:2024:PFM,
author = "Dariusz Brzezinski and Julia Stachowiak and Jerzy
Stefanowski and Izabela Szczech and Robert Susmaga and
Sofya Aksenyuk and Uladzimir Ivashka and Oleksandr
Yasinskyi",
title = "Properties of Fairness Measures in the Context of
Varying Class Imbalance and Protected Group Ratios",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "170:1--170:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3654659",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3654659",
abstract = "Society is increasingly relying on predictive models
in fields like criminal justice, credit risk
management, and hiring. To prevent such automated
systems from discriminating against people belonging to
certain groups, fairness measures have become a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "170",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Chen:2024:TRT,
author = "Yunkai Chen and Qimeng Wang and Shiwei Wu and Yan Gao
and Tong Xu and Yao Hu",
title = "{TOMGPT}: Reliable Text-Only Training Approach for
Cost-Effective Multi-modal Large Language Model",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "171:1--171:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3654674",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3654674",
abstract = "Multi-modal large language models (MLLMs), such as
GPT-4, exhibit great comprehension capabilities on
human instruction, as well as zero-shot ability on new
downstream multi-modal tasks. To integrate the
different modalities within a unified embedding
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "171",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:FTT,
author = "Jiaxin Zhang and Yiqi Wang and Xihong Yang and En
Zhu",
title = "A Fully Test-time Training Framework for
Semi-supervised Node Classification on
Out-of-Distribution Graphs",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "172:1--172:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3649507",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3649507",
abstract = "Graph neural networks (GNNs) have shown great
potential in representation learning for various graph
tasks. However, the distribution shift between the
training and test sets poses a challenge to the
efficiency of GNNs. To address this challenge, HomoTTT
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "172",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhu:2024:STR,
author = "Ronghang Zhu and Dongliang Guo and Daiqing Qi and
Zhixuan Chu and Xiang Yu and Sheng Li",
title = "A Survey of Trustworthy Representation Learning Across
Domains",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "173:1--173:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3657301",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3657301",
abstract = "As AI systems have obtained significant performance to
be deployed widely in our daily lives and human
society, people both enjoy the benefits brought by
these technologies and suffer many social issues
induced by these systems. To make AI systems good
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "173",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:SAS,
author = "Mengyao Li and Zhiyong Li and Zhibang Yang and Xu Zhou
and Yifan Li and Ziyan Wu and Lingzhao Kong and Ke
Nai",
title = "{SA2E-AD}: a Stacked Attention Autoencoder for Anomaly
Detection in Multivariate Time Series",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "174:1--174:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653677",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653677",
abstract = "Anomaly detection for multivariate time series is an
essential task in the modern industrial field. Although
several methods have been developed for anomaly
detection, they usually fail to effectively exploit the
metrical-temporal correlation and the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "174",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhuang:2024:TRR,
author = "Wen-Ming Zhuang and Chih-Yao Chen and Cheng-Te Li",
title = "Towards Robust Rumor Detection with Graph Contrastive
and Curriculum Learning",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "175:1--175:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653023",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653023",
abstract = "Establishing a robust rumor detection model is vital
in safeguarding the veracity of information on social
media platforms. However, existing approaches to
stopping rumor from spreading rely on abundant and
clean training data, which is rarely available
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "175",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:TFL,
author = "Lei Zhang and Lele Fu and Chen Liu and Zhao Yang and
Jinghua Yang and Zibin Zheng and Chuan Chen",
title = "Toward Few-Label Vertical Federated Learning",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "176:1--176:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3656344",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3656344",
abstract = "Federated Learning (FL) provides a novel paradigm for
privacy-preserving machine learning, enabling multiple
clients to collaborate on model training without
sharing private data. To handle multi-source
heterogeneous data, Vertical Federated Learning (VFL).
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "176",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liu:2024:LIG,
author = "Xinru Liu and Yongjing Hao and Lei Zhao and Guanfeng
Liu and Victor S. Sheng and Pengpeng Zhao",
title = "{LMACL}: Improving Graph Collaborative Filtering with
Learnable Model Augmentation Contrastive Learning",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "177:1--177:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3657302",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3657302",
abstract = "Graph collaborative filtering (GCF) has achieved
exciting recommendation performance with its ability to
aggregate high-order graph structure information.
Recently, contrastive learning (CL) has been
incorporated into GCF to alleviate data sparsity and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "177",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Schlieper:2024:EUO,
author = "Philipp Schlieper and Hermann Luft and Kai Klede and
Christoph Strohmeyer and Bjoern Eskofier and Dario
Zanca",
title = "Enhancing Unsupervised Outlier Model Selection: a
Study on {IREOS} Algorithms",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "178:1--178:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3653719",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3653719",
abstract = "Outlier detection stands as a critical cornerstone in
the field of data mining, with a wide range of
applications spanning from fraud detection to network
security. However, real-world scenarios often lack
labeled data for training, necessitating \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "178",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sui:2024:CAS,
author = "Hongjie Sui and Huan Yan and Tianyi Zheng and Wenzhen
Huang and Yunlin Zhuang and Yong Li",
title = "Congestion-aware Spatio-Temporal Graph Convolutional
Network-based {$ A* $} Search Algorithm for Fastest
Route Search",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "179:1--179:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3657640",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3657640",
abstract = "The fastest route search, which is to find a path with
the shortest travel time when the user initiates a
query, has become one of the most important services in
many map applications. To enhance the user experience
of travel, it is necessary to achieve \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "179",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Bicego:2024:CRF,
author = "Manuele Bicego and Ferdinando Cicalese",
title = "Computing Random Forest-distances in the presence of
missing data",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "180:1--180:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3656345",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3656345",
abstract = "In this article, we study the problem of computing
Random Forest-distances in the presence of missing
data. We present a general framework which avoids
pre-imputation and uses in an agnostic way the
information contained in the input points. We centre
our \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "180",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Deng:2024:DPL,
author = "Jiayi Deng and Danyang Huang and Bo Zhang",
title = "Distributed Pseudo-Likelihood Method for Community
Detection in Large-Scale Networks",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "181:1--181:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3657300",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3657300",
abstract = "This paper proposes a distributed pseudo-likelihood
method (DPL) to conveniently identify the community
structure of large-scale networks. Specifically, we
first propose a block-wise splitting method to divide
large-scale network data into several \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "181",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xia:2024:FEF,
author = "Bolun (Namir) Xia and Vipula Rawte and Aparna Gupta
and Mohammed Zaki",
title = "{FETILDA}: Evaluation Framework for Effective
Representations of Long Financial Documents",
journal = j-TKDD,
volume = "18",
number = "7",
pages = "182:1--182:??",
month = aug,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3657299",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Sat Jun 22 11:37:44 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3657299",
abstract = "In the financial sphere, there is a wealth of
accumulated unstructured financial data, such as the
textual disclosure documents that companies submit on a
regular basis to regulatory agencies, such as the
Securities and Exchange Commission. These \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "182",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yu:2024:IGC,
author = "Penghang Yu and Bing-Kun Bao and Zhiyi Tan and
Guanming Lu",
title = "Improving Graph Collaborative Filtering with
Directional Behavior Enhanced Contrastive Learning",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "183:1--183:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3663574",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3663574",
abstract = "Graph Collaborative Filtering is a widely adopted
approach for recommendation, which captures similar
behavior features through Graph Neural Network (GNN).
Recently, Contrastive Learning (CL) has been
demonstrated as an effective method to enhance the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "183",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Lu:2024:FNS,
author = "Fengcheng Lu and Michael Kwok-Po Ng",
title = "{FastHGNN}: a New Sampling Technique for Learning with
Hypergraph Neural Networks",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "184:1--184:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3663670",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3663670",
abstract = "Hypergraphs can represent higher-order relations among
objects. Traditional hypergraph neural networks involve
node-edge-node transform, leading to high computational
cost and timing. The main aim of this article is to
propose a new sampling technique for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "184",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Huang:2024:EEA,
author = "Yinqiu Huang and Min Gao and Kai Shu and Chenghua Lin
and Jia Wang and Wei Zhou",
title = "{EML}: Emotion-Aware Meta Learning for Cross-Event
False Information Detection",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "185:1--185:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3661485",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3661485",
abstract = "Modern social media's development has dramatically
changed how people obtain information. However, the
wide dissemination of various false information has
severe detrimental effects. Accordingly, many deep
learning-based methods have been proposed to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "185",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Qian:2024:LAL,
author = "Yu-Yang Qian and Zhen-Yu Zhang and Peng Zhao and
Zhi-Hua Zhou",
title = "Learning with Asynchronous Labels",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "186:1--186:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3662186",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3662186",
abstract = "Learning with data streams has attracted much
attention in recent decades. Conventional approaches
typically assume that the feature and label of a data
item can be timely observed at each round. In many
real-world tasks, however, it often occurs that
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "186",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{He:2024:VAD,
author = "Yifan He and Yatao Bian and Xi Ding and Bingzhe Wu and
Jihong Guan and Ji Zhang and Shuigeng Zhou",
title = "Variate Associated Domain Adaptation for Unsupervised
Multivariate Time Series Anomaly Detection",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "187:1--187:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3663573",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3663573",
abstract = "Multivariate Time Series Anomaly Detection (MTS-AD) is
crucial for the effective management and maintenance of
devices in complex systems, such as server clusters,
spacecrafts, and financial systems, and so on. However,
upgrade or cross-platform \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "187",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liang:2024:DCG,
author = "Tianhai Liang and Qiangqiang Shen and Shuqin Wang and
Yongyong Chen and Guokai Zhang and Junxin Chen",
title = "Data Completion-Guided Unified Graph Learning for
Incomplete Multi-View Clustering",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "188:1--188:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3664290",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3664290",
abstract = "Due to its heterogeneous property, multi-view data has
been widely concerned over single-view data for
performance improvement. Unfortunately, some instances
may be with partially available information because of
some uncontrollable factors, for which the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "188",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:MUT,
author = "Tong Li and Shuodi Hui and Shiyuan Zhang and Huandong
Wang and Yuheng Zhang and Pan Hui and Depeng Jin and
Yong Li",
title = "Mobile User Traffic Generation Via Multi-Scale
Hierarchical {GAN}",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "189:1--189:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3664655",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3664655",
abstract = "Mobile user traffic facilitates diverse applications,
including network planning and optimization, whereas
large-scale mobile user traffic is hardly available due
to privacy concerns. One alternative solution is to
generate mobile user traffic data for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "189",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Nicola:2024:IRM,
author = "Victor {Gomes De Oliveira Martins Nicola} and Karina
{Valdivia Delgado} and Marcelo de Souza Lauretto",
title = "Imbalance-Robust Multi-Label Self-Adjusting {kNN}",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "190:1--190:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3663575",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3663575",
abstract = "In the task of multi-label classification in data
streams, instances arriving in real-time need to be
associated with multiple labels simultaneously. Various
methods based on the k Nearest Neighbors algorithm have
been proposed to address this task. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "190",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:MOR,
author = "Xiangyu Li and Hua Wang",
title = "On Mean-Optimal Robust Linear Discriminant Analysis",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "191:1--191:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665500",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3665500",
abstract = "Linear discriminant analysis (LDA) is widely used for
dimensionality reduction under supervised learning
settings. Traditional LDA objective aims to minimize
the ratio of the squared Euclidean distances that may
not perform optimally on noisy datasets. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "191",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ekle:2024:ADD,
author = "Ocheme Anthony Ekle and William Eberle",
title = "Anomaly Detection in Dynamic Graphs: a Comprehensive
Survey",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "192:1--192:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3669906",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3669906",
abstract = "This survey article presents a comprehensive and
conceptual overview of anomaly detection (AD) using
dynamic graphs. We focus on existing graph-based AD
techniques and their applications to dynamic networks.
The contributions of this survey article \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "192",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Xi:2024:UOR,
author = "Yunjia Xi and Weiwen Liu and Xinyi Dai and Ruiming
Tang and Qing Liu and Weinan Zhang and Yong Yu",
title = "Utility-Oriented Reranking with Counterfactual
Context",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "193:1--193:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3671004",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3671004",
abstract = "As a critical task for large-scale commercial
recommender systems, reranking rearranges items in the
initial ranking lists from the previous ranking stage
to better meet users' demands. Foundational work in
reranking has shown the potential of improving
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "193",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yin:2024:CVK,
author = "Jiao Yin and Wei Hong and Hua Wang and Jinli Cao and
Yuan Miao and Yanchun Zhang",
title = "A Compact Vulnerability Knowledge Graph for Risk
Assessment",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "194:1--194:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3671005",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3671005",
abstract = "Software vulnerabilities, also known as flaws, bugs or
weaknesses, are common in modern information systems,
putting critical data of organizations and individuals
at cyber risk. Due to the scarcity of resources,
initial risk assessment is becoming a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "194",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Yang:2024:DPD,
author = "Lingkai Yang and Sally McClean and Mark Donnelly and
Kashaf Khan and Kevin Burke",
title = "Detecting Process Duration Drift Using Gamma Mixture
Models in a Left-Truncated and Right-Censored
Environment",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "195:1--195:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3669942",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3669942",
abstract = "Within the realm of business context, process duration
signifies time spent by customers between successive
activities. This temporal perspective offers important
insight to customer behavior, highlighting potential
bottlenecks, and influencing business \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "195",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhang:2024:ADA,
author = "Yang Zhang and Ting Yu and Shengqiang Chi and Zhen
Wang and Yue Gao and Ji Zhang and Tianshu Zhou",
title = "Attribute Diversity Aware Community Detection on
Attributed Graphs Using Three-View Graph Attention
Neural Networks",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "196:1--196:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3672081",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3672081",
abstract = "Community detection is a fundamental yet important
task for characterizing and understanding the structure
of attributed graphs. Existing methods mainly focus on
the structural tightness and attribute similarity among
nodes in a community. However, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "196",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:HMP,
author = "Munan Li and Kai Liu and Hongbo Liu and Zheng Zhao and
Tomas E. Ward and Xindong Wu",
title = "Heterogeneous Meta-Path Graph Learning for
Higher-Order Social Recommendation",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "197:1--197:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3673658",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3673658",
abstract = "Recommendation systems have become an indispensable
part of daily life. Social recommendation systems,
which utilize social relationships and past behaviors
to infer users' preferences, have gained popularity in
recent years. Exploring the inherent \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "197",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liang:2024:DUP,
author = "Yuliang Liang and Enneng Yang and Guibing Guo and Wei
Cai and Linying Jiang and Xingwei Wang",
title = "Deconfounding User Preference in Recommendation
Systems through Implicit and Explicit Feedback",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "198:1--198:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3673762",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3673762",
abstract = "Recommender systems are influenced by many confounding
factors (i.e., confounders) which result in various
biases (e.g., popularity biases) and inaccurate user
preference. Existing approaches try to eliminate these
biases by inference with causal graphs. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "198",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhao:2024:LIT,
author = "Ziyu Zhao and Yuqi Bai and Ruoxuan Xiong and Qingyu
Cao and Chao Ma and Ning Jiang and Fei Wu and Kun
Kuang",
title = "Learning Individual Treatment Effects under
Heterogeneous Interference in Networks",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "199:1--199:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3673761",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3673761",
abstract = "Estimating individual treatment effects in networked
observational data is a crucial and increasingly
recognized problem. One major challenge of this problem
is violating the stable unit treatment value assumption
(SUTVA), which posits that a unit's \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "199",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ma:2024:SPS,
author = "Fei Ma and Ping Wang",
title = "Structural Properties on Scale-Free Tree Network with
an Ultra-Large Diameter",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "200:1--200:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3674146",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3674146",
abstract = "Scale-free networks are prevalently observed in a
great variety of complex systems, which triggers
various researches relevant to networked models of such
type. In this work, we propose a family of growth tree
networks \(\mathcal{T}_{t}\), which turn \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "200",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:QNN,
author = "Jiaye Li and Jinjing Shi and Jian Zhang and Yuhu Lu
and Qin Li and Chunlin Yu and Shichao Zhang",
title = "Quantum Nearest Neighbor Collaborative Filtering
Algorithm for Recommendation System",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "201:1--201:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3674982",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3674982",
abstract = "Recommendation has become especially crucial during
the COVID-19 pandemic as a significant number of people
rely on online shopping from home. Existing
recommendation algorithms, designed to address issues
like cold start and data sparsity, often overlook
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "201",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Ding:2024:TFD,
author = "Shifei Ding and Benyu Wu and Ling Ding and Xiao Xu and
Lili Guo and Hongmei Liao and Xindong Wu",
title = "Towards Faster Deep Graph Clustering via Efficient
Graph Auto-Encoder",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "202:1--202:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3674983",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3674983",
abstract = "Deep graph clustering (DGC) has been a promising
method for clustering graph data in recent years.
However, existing research primarily focuses on
optimizing clustering outcomes by improving the quality
of embedded representations, resulting in slow-speed
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "202",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Sun:2024:TDA,
author = "Mingchen Sun and Yingji Li and Ying Wang and Xin
Wang",
title = "Towards Domain-Aware Stable Meta Learning for
Out-of-Distribution Generalization",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "203:1--203:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3676558",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3676558",
abstract = "Deep learning models are often trained on datasets
that are limited in size and distribution, which may
not fully represent the entire range of data
encountered in practice. Thus, making deep learning
models generalize to out-of-distribution data has
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "203",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wei:2024:NER,
author = "Xiangyu Wei and Wei Wang and Chongsheng Zhang and
Weiping Ding and Bin Wang and Yaguan Qian and Zhen Han
and Chunhua Su",
title = "Neighbor-Enhanced Representation Learning for Link
Prediction in Dynamic Heterogeneous Attributed
Networks",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "204:1--204:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3676559",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3676559",
abstract = "Dynamic link prediction aims to predict future
connections among unconnected nodes in a network. It
can be applied for friend recommendations, link
completion, and other tasks. Network representation
learning algorithms have demonstrated considerable
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "204",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Li:2024:MMI,
author = "Hongyu Li and Lefei Zhang and Kehua Su and Wei Yu",
title = "{MICCF}: a Mutual Information Constrained Clustering
Framework for Learning Clustering-Oriented Feature
Representations",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "205:1--205:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3672402",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3672402",
abstract = "Deep clustering is a crucial task in machine learning
and data mining that focuses on acquiring feature
representations conducive to clustering. Previous
research relies on self-supervised representation
learning for general feature representations, such
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "205",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Liao:2024:APT,
author = "Chengwu Liao and Chao Chen and Wanyi Zhang and Suiming
Guo and Chao Liu",
title = "{AGENDA}: Predicting Trip Purposes with A New Graph
Embedding Network and Active Domain Adaptation",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "206:1--206:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3677020",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3677020",
abstract = "Trip purpose is a meaningful aspect of travel
behaviour for the understanding of urban mobility.
However, it is non-trivial to automatically obtain trip
purposes. On one hand, trip purposes are naturally
diverse and complicated, but the available \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "206",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Peng:2024:MSS,
author = "Bo Peng and Ziqi Chen and Srinivasan Parthasarathy and
Xia Ning",
title = "Modeling Sequences as Star Graphs to Address
Over-Smoothing in Self-Attentive Sequential
Recommendation",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "207:1--207:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3676560",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3676560",
abstract = "Self-attention (SA) mechanisms have been widely used
in developing sequential recommendation (SR) methods,
and demonstrated state-of-the-art performance. However,
in this article, we show that self-attentive SR methods
substantially suffer from the over-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "207",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Wang:2024:FFL,
author = "Chunnan Wang and Xiangyu Shi and Hongzhi Wang",
title = "Fair Federated Learning with Multi-Objective
Hyperparameter Optimization",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "208:1--208:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3676968",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3676968",
abstract = "Federated learning (FL) is an attractive paradigm for
privacy-aware distributed machine learning, which
enables clients to collaboratively learn a global model
without sharing clients' data. Recently, many
strategies have been proposed to improve the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "208",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Zhou:2024:CED,
author = "Peng Zhou and Yufeng Guo and Haoran Yu and Yuanting
Yan and Yanping Zhang and Xindong Wu",
title = "Concept Evolution Detecting over Feature Streams",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "209:1--209:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3678012",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3678012",
abstract = "The explosion of data volume has gradually transformed
big data processing from the static batch mode to the
online streaming model. Streaming data can be divided
into instance streams (feature space remains fixed
while instances increase over time), \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "209",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}
@Article{Meng:2024:SIB,
author = "Siyuan Meng and Jie Zhou and Xuxin Chen and Yufei Liu
and Fengyuan Lu and Xinli Huang",
title = "Structure-Information-Based Reasoning over the
Knowledge Graph: a Survey of Methods and Applications",
journal = j-TKDD,
volume = "18",
number = "8",
pages = "210:1--210:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3671148",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Wed Aug 28 06:29:41 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
URL = "https://dl.acm.org/doi/10.1145/3671148",
abstract = "The knowledge graph (KG) is an efficient form of
knowledge organization and expression, providing prior
knowledge support for various downstream tasks, and has
received extensive attention in natural language
processing. However, existing large-scale KGs
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Knowl. Discov. Data",
articleno = "210",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "https://dl.acm.org/loi/tkdd",
}