%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.20",
%%%     date            = "02 July 2014",
%%%     time            = "18:22:41 MDT",
%%%     filename        = "tweb.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "46901 6830 38293 357489",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "ACM Transactions on the Web (TWEB);
%%%                        bibliography; TWEB",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on the Web (TWEB) (CODEN
%%%                        ????, ISSN 1559-1131), covering all journal
%%%                        issues from 2007 -- date.
%%%
%%%                        At version 1.20, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2007 (  14)    2010 (  17)    2013 (  30)
%%%                             2008 (  22)    2011 (  21)    2014 (  13)
%%%                             2009 (  14)    2012 (  18)
%%%
%%%                             Article:        149
%%%
%%%                             Total entries:  149
%%%
%%%                        The journal Web page can be found at:
%%%
%%%                            http://www.acm.org/pubs/tweb.html
%%%
%%%                        The journal table of contents page is at:
%%%
%%%                            http://www.acm.org/tweb/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J1062
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================

@Preamble{"\input bibnames.sty" #
    "\def \TM {${}^{\sc TM}$}"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TWEB                  = "ACM Transactions on the Web (TWEB)"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Ashman:2007:I,
  author =       "Helen Ashman and Arun Iyengar",
  title =        "Introduction",
  journal =      j-TWEB,
  volume =       "1",
  number =       "1",
  pages =        "1:1--1:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1232722.1232723",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Urgaonkar:2007:AMM,
  author =       "Bhuvan Urgaonkar and Giovanni Pacifici and Prashant
                 Shenoy and Mike Spreitzer and Asser Tantawi",
  title =        "Analytic modeling of multitier {Internet}
                 applications",
  journal =      j-TWEB,
  volume =       "1",
  number =       "1",
  pages =        "2:1--2:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1232722.1232724",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Since many Internet applications employ a multitier
                 architecture, in this article, we focus on the problem
                 of analytically modeling the behavior of such
                 applications. We present a model based on a network of
                 queues where the queues represent different tiers of
                 the application. Our model is sufficiently general to
                 capture (i) the behavior of tiers with significantly
                 different performance characteristics and (ii)
                 application idiosyncrasies such as session-based
                 workloads, tier replication, load imbalances across
                 replicas, and caching at intermediate tiers. We
                 validate our model using real multitier applications
                 running on a Linux server cluster. Our experiments
                 indicate that our model faithfully captures the
                 performance of these applications for a number of
                 workloads and configurations. Furthermore, our model
                 successfully handles a comprehensive range of resource
                 utilization---from 0 to near saturation for the
                 CPU---for two separate tiers. For a variety of
                 scenarios, including those with caching at one of the
                 application tiers, the average response times predicted
                 by our model were within the 95\% confidence intervals
                 of the observed average response times. Our experiments
                 also demonstrate the utility of the model for dynamic
                 capacity provisioning, performance prediction,
                 bottleneck identification, and session policing. In one
                 scenario, where the request arrival rate increased from
                 less than 1500 to nearly 4200 requests/minute, a
                 dynamic provisioning technique employing our model was
                 able to maintain response time targets by increasing
                 the capacity of two of the tiers by factors of 2 and
                 3.5, respectively.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "analytical model; dynamic provisioning; hosting
                 platform; Internet service; mean-value analysis;
                 performance prediction; policing; queuing theory;
                 session; tier",
}

@Article{Jansen:2007:CES,
  author =       "Bernard J. Jansen",
  title =        "The comparative effectiveness of sponsored and
                 nonsponsored links for {Web} e-commerce queries",
  journal =      j-TWEB,
  volume =       "1",
  number =       "1",
  pages =        "3:1--3:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1232722.1232725",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The predominant business model for Web search engines
                 is sponsored search, which generates billions in yearly
                 revenue. But are sponsored links providing online
                 consumers with relevant choices for products and
                 services? We address this and related issues by
                 investigating the relevance of sponsored and
                 nonsponsored links for e-commerce queries on the major
                 search engines. The results show that average relevance
                 ratings for sponsored and nonsponsored links are
                 practically the same, although the relevance ratings
                 for sponsored links are statistically higher. We used
                 108 ecommerce queries and 8,256 retrieved links for
                 these queries from three major Web search engines:
                 Yahoo!, Google, and MSN. In addition to relevance
                 measures, we qualitatively analyzed the e-commerce
                 queries, deriving five categorizations of underlying
                 information needs. Product-specific queries are the
                 most prevalent (48\%). Title (62\%) and summary (33\%)
                 are the primary basis for evaluating sponsored links
                 with URL a distant third (2\%). To gauge the
                 effectiveness of sponsored search campaigns, we
                 analyzed the sponsored links from various viewpoints.
                 It appears that links from organizations with large
                 sponsored search campaigns are more relevant than the
                 average sponsored link. We discuss the implications for
                 Web search engines and sponsored search as a long-term
                 business model and as a mechanism for finding relevant
                 information for searchers.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "e-commerce searching; sponsored links; sponsored
                 results; sponsored search; Web search engines; Web
                 searching",
}

@Article{Church:2007:MIA,
  author =       "Karen Church and Barry Smyth and Paul Cotter and Keith
                 Bradley",
  title =        "Mobile information access: {A} study of emerging
                 search behavior on the mobile {Internet}",
  journal =      j-TWEB,
  volume =       "1",
  number =       "1",
  pages =        "4:1--4:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1232722.1232726",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "It is likely that mobile phones will soon come to
                 rival more traditional devices as the primary platform
                 for information access. Consequently, it is important
                 to understand the emerging information access behavior
                 of mobile Internet (MI) users especially in relation to
                 their use of mobile handsets for information browsing
                 and query-based search. In this article, we describe
                 the results of a recent analysis of the MI habits of
                 more than 600,000 European MI users, with a particular
                 emphasis on the emerging interest in mobile search. We
                 consider a range of factors including whether there are
                 key differences between browsing and search behavior on
                 the MI compared to the Web. We highlight how browsing
                 continues to dominate mobile information access, but go
                 on to show how search is becoming an increasingly
                 popular information access alternative especially in
                 relation to certain types of mobile handsets and
                 information needs. Moreover, we show that sessions
                 involving search tend to be longer and more data-rich
                 than those that do not involve search. We also look at
                 the type of queries used during mobile search and the
                 way that these queries tend to be modified during the
                 course of a mobile search session. Finally we examine
                 the overlap among mobile search queries and the
                 different topics mobile users are interested in.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "log analysis; Mobile browsing; mobile Internet; mobile
                 search",
}

@Article{Leskovec:2007:DVM,
  author =       "Jure Leskovec and Lada A. Adamic and Bernardo A.
                 Huberman",
  title =        "The dynamics of viral marketing",
  journal =      j-TWEB,
  volume =       "1",
  number =       "1",
  pages =        "5:1--5:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1232722.1232727",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We present an analysis of a person-to-person
                 recommendation network, consisting of 4 million people
                 who made 16 million recommendations on half a million
                 products. We observe the propagation of recommendations
                 and the cascade sizes, which we explain by a simple
                 stochastic model. We analyze how user behavior varies
                 within user communities defined by a recommendation
                 network. Product purchases follow a `long tail' where a
                 significant share of purchases belongs to rarely sold
                 items. We establish how the recommendation network
                 grows over time and how effective it is from the
                 viewpoint of the sender and receiver of the
                 recommendations. While on average recommendations are
                 not very effective at inducing purchases and do not
                 spread very far, we present a model that successfully
                 identifies communities, product, and pricing categories
                 for which viral marketing seems to be very effective.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "e-commerce; long tail; network analysis; recommender
                 systems; Viral marketing; word-of-mouth",
}

@Article{Yu:2007:EAW,
  author =       "Tao Yu and Yue Zhang and Kwei-Jay Lin",
  title =        "Efficient algorithms for {Web} services selection with
                 end-to-end {QoS} constraints",
  journal =      j-TWEB,
  volume =       "1",
  number =       "1",
  pages =        "6:1--6:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1232722.1232728",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:16:53 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Service-Oriented Architecture (SOA) provides a
                 flexible framework for service composition. Using
                 standard-based protocols (such as SOAP and WSDL),
                 composite services can be constructed by integrating
                 atomic services developed independently. Algorithms are
                 needed to select service components with various QoS
                 levels according to some application-dependent
                 performance requirements. We design a broker-based
                 architecture to facilitate the selection of QoS-based
                 services. The objective of service selection is to
                 maximize an application-specific utility function under
                 the end-to-end QoS constraints. The problem is modeled
                 in two ways: the combinatorial model and the graph
                 model. The combinatorial model defines the problem as a
                 multidimension multichoice 0-1 knapsack problem (MMKP).
                 The graph model defines the problem as a
                 multiconstraint optimal path (MCOP) problem. Efficient
                 heuristic algorithms for service processes of different
                 composition structures are presented in this article
                 and their performances are studied by simulations. We
                 also compare the pros and cons between the two
                 models.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "End-to-end QoS; service composition; service oriented
                 architecture (SOA); service selection; Web services",
}

@Article{Dubinko:2007:VTT,
  author =       "Micah Dubinko and Ravi Kumar and Joseph Magnani and
                 Jasmine Novak and Prabhakar Raghavan and Andrew
                 Tomkins",
  title =        "Visualizing tags over time",
  journal =      j-TWEB,
  volume =       "1",
  number =       "2",
  pages =        "7:1--7:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1255438.1255439",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We consider the problem of visualizing the evolution
                 of tags within the Flickr (flickr.com) online image
                 sharing community. Any user of the Flickr service may
                 append a tag to any photo in the system. Over the past
                 year, users have on average added over a million tags
                 each week. Understanding the evolution of these tags
                 over time is therefore a challenging task. We present a
                 new approach based on a characterization of the most
                 interesting tags associated with a sliding interval of
                 time. An animation provided via Flash in a Web browser
                 allows the user to observe and interact with the
                 interesting tags as they evolve over time.\par

                 New algorithms and data structures are required to
                 support the efficient generation of this visualization.
                 We combine a novel solution to an interval covering
                 problem with extensions to previous work on score
                 aggregation in order to create an efficient backend
                 system capable of producing visualizations at arbitrary
                 scales on this large dataset in real time.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Flickr; interval covering; tags; temporal evolution;
                 visualization",
}

@Article{Mohan:2007:SPC,
  author =       "Bharath Kumar Mohan and Benjamin J. Keller and Naren
                 Ramakrishnan",
  title =        "Scouts, promoters, and connectors: {The} roles of
                 ratings in nearest-neighbor collaborative filtering",
  journal =      j-TWEB,
  volume =       "1",
  number =       "2",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1255438.1255440",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Recommender systems aggregate individual user ratings
                 into predictions of products or services that might
                 interest visitors. The quality of this aggregation
                 process crucially affects the user experience and hence
                 the effectiveness of recommenders in e-commerce. We
                 present a characterization of nearest-neighbor
                 collaborative filtering that allows us to disaggregate
                 global recommender performance measures into
                 contributions made by each individual rating. In
                 particular, we formulate three roles--- {\em scouts},
                 {\em promoters}, and {\em connectors\/} ---that capture
                 how users receive recommendations, how items get
                 recommended, and how ratings of these two types are
                 themselves connected, respectively. These roles find
                 direct uses in improving recommendations for users, in
                 better targeting of items and, most importantly, in
                 helping monitor the health of the system as a whole.
                 For instance, they can be used to track the evolution
                 of neighborhoods, to identify rating subspaces that do
                 not contribute (or contribute negatively) to system
                 performance, to enumerate users who are in danger of
                 leaving, and to assess the susceptibility of the system
                 to attacks such as shilling. We argue that the three
                 rating roles presented here provide broad primitives to
                 manage a recommender system and its community.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "collaborative filtering; connectors; neighborhoods;
                 promoters; Recommender systems; scouts; user-based and
                 item-based algorithms",
}

@Article{Rogers:2007:EPB,
  author =       "Alex Rogers and Esther David and Nicholas R. Jennings
                 and Jeremy Schiff",
  title =        "The effects of proxy bidding and minimum bid
                 increments within {eBay} auctions",
  journal =      j-TWEB,
  volume =       "1",
  number =       "2",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1255438.1255441",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We present a mathematical model of the eBay auction
                 protocol and perform a detailed analysis of the effects
                 that the eBay proxy bidding system and the minimum bid
                 increment have on the auction properties. We first
                 consider the revenue of the auction, and we show
                 analytically that when two bidders with independent
                 private valuations use the eBay proxy bidding system
                 there exists an optimal value for the minimum bid
                 increment at which the auctioneer's revenue is
                 maximized. We then consider the sequential way in which
                 bids are placed within the auction, and we show
                 analytically that independent of assumptions regarding
                 the bidders' valuation distribution or bidding strategy
                 the number of visible bids placed is related to the
                 logarithm of the number of potential bidders. Thus, in
                 many cases, it is only a minority of the potential
                 bidders that are able to submit bids and are visible in
                 the auction bid history (despite the fact that the
                 other hidden bidders are still effectively competing
                 for the item). Furthermore, we show through simulation
                 that the minimum bid increment also introduces an
                 inefficiency to the auction, whereby a bidder who
                 enters the auction late may find that its valuation is
                 insufficient to allow them to advance the current bid
                 by the minimum bid increment despite them actually
                 having the highest valuation for the item. Finally, we
                 use these results to consider appropriate strategies
                 for bidders within real world eBay auctions. We show
                 that while last-minute bidding (sniping) is an
                 effective strategy against bidders engaging in
                 incremental bidding (and against those with common
                 values), in general, delaying bidding is
                 disadvantageous even if delayed bids are sure to be
                 received before the auction closes. Thus, when several
                 bidders submit last-minute bids, we show that rather
                 than seeking to bid as late as possible, a bidder
                 should try to be the first sniper to bid (i.e., it
                 should ``snipe before the snipers'').",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "bid increment; electronic commerce; Online auctions;
                 proxy bidding; sniping",
}

@Article{Serrano:2007:DSW,
  author =       "M. {\'A}ngeles Serrano and Ana Maguitman and
                 Mari{\'a}n Bogu{\~n}{\'a} and Santo Fortunato and
                 Alessandro Vespignani",
  title =        "Decoding the structure of the {WWW}: {A} comparative
                 analysis of {Web} crawls",
  journal =      j-TWEB,
  volume =       "1",
  number =       "2",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1255438.1255442",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The understanding of the immense and intricate
                 topological structure of the World Wide Web (WWW) is a
                 major scientific and technological challenge. This has
                 been recently tackled by characterizing the properties
                 of its representative graphs, in which vertices and
                 directed edges are identified with Web pages and
                 hyperlinks, respectively. Data gathered in large-scale
                 crawls have been analyzed by several groups resulting
                 in a general picture of the WWW that encompasses many
                 of the complex properties typical of rapidly evolving
                 networks. In this article, we report a detailed
                 statistical analysis of the topological properties of
                 four different WWW graphs obtained with different
                 crawlers. We find that, despite the very large size of
                 the samples, the statistical measures characterizing
                 these graphs differ quantitatively, and in some cases
                 qualitatively, depending on the domain analyzed and the
                 crawl used for gathering the data. This spurs the issue
                 of the presence of sampling biases and structural
                 differences of Web crawls that might induce properties
                 not representative of the actual global underlying
                 graph. In short, the stability of the widely accepted
                 statistical description of the Web is called into
                 question. In order to provide a more accurate
                 characterization of the Web graph, we study statistical
                 measures beyond the degree distribution, such as
                 degree-degree correlation functions or the statistics
                 of reciprocal connections. The latter appears to
                 enclose the relevant correlations of the WWW graph and
                 carry most of the topological information of the Web.
                 The analysis of this quantity is also of major interest
                 in relation to the navigability and searchability of
                 the Web.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "crawler biases; statistical analysis; Web graph
                 structure; Web measurement",
}

@Article{Reis:2007:BVD,
  author =       "Charles Reis and John Dunagan and Helen J. Wang and
                 Opher Dubrovsky and Saher Esmeir",
  title =        "{BrowserShield}: {Vulnerability}-driven filtering of
                 dynamic {HTML}",
  journal =      j-TWEB,
  volume =       "1",
  number =       "3",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1281480.1281481",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Vulnerability-driven filtering of network data can
                 offer a fast and easy-to-deploy alternative or
                 intermediary to software patching, as exemplified in
                 Shield [Wang et al. 2004]. In this article, we take
                 Shield's vision to a new domain, inspecting and
                 cleansing not just static content, but also dynamic
                 content. The dynamic content we target is the dynamic
                 HTML in Web pages, which have become a popular vector
                 for attacks. The key challenge in filtering dynamic
                 HTML is that it is undecidable to statically determine
                 whether an embedded script will exploit the browser at
                 runtime. We avoid this undecidability problem by
                 rewriting web pages and any embedded scripts into safe
                 equivalents, inserting checks so that the filtering is
                 done at runtime. The rewritten pages contain logic for
                 recursively applying runtime checks to dynamically
                 generated or modified web content, based on known
                 vulnerabilities. We have built and evaluated {\em
                 BrowserShield}, a general framework that performs this
                 dynamic instrumentation of embedded scripts, and that
                 admits policies for customized runtime actions like
                 vulnerability-driven filtering. We also explore other
                 applications on top of BrowserShield.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "code rewriting; JavaScript; vulnerability; Web
                 browser",
}

@Article{Sun:2007:MDW,
  author =       "Zan Sun and Jalal Mahmud and I. V. Ramakrishnan and
                 Saikat Mukherjee",
  title =        "Model-directed {Web} transactions under constrained
                 modalities",
  journal =      j-TWEB,
  volume =       "1",
  number =       "3",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1281480.1281482",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Online transactions (e.g., buying a book on the Web)
                 typically involve a number of steps spanning several
                 pages. Conducting such transactions under constrained
                 interaction modalities as exemplified by small screen
                 handhelds or interactive speech interfaces --- the
                 primary mode of communication for visually impaired
                 individuals --- is a strenuous, fatigue-inducing
                 activity. But usually one needs to browse only a small
                 fragment of a Web page to perform a transactional step
                 such as a form fillout, selecting an item from a search
                 results list, and so on. We exploit this observation to
                 develop an automata-based process model that delivers
                 only the ``relevant'' page fragments at each
                 transactional step, thereby reducing information
                 overload on such narrow interaction bandwidths. We
                 realize this model by coupling techniques from content
                 analysis of Web documents, automata learning and
                 statistical classification. The process model and
                 associated techniques have been incorporated into
                 Guide-O, a prototype system that facilitates online
                 transactions using speech/keyboard interface
                 (Guide-O-Speech), or with limited-display size
                 handhelds (Guide-O-Mobile). Performance of Guide-O and
                 its user experience are reported.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "assistive device; content adaption; Web transaction",
}

@Article{Sharman:2007:CAD,
  author =       "Raj Sharman and Shiva Shankar Ramanna and Ram Ramesh
                 and Ram Gopal",
  title =        "Cache architecture for on-demand streaming on the
                 {Web}",
  journal =      j-TWEB,
  volume =       "1",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1281480.1281483",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "On-demand streaming from a remote server through
                 best-effort Internet poses several challenges because
                 of network losses and variable delays. The primary
                 technique used to improve the quality of distributed
                 content service is replication. In the context of the
                 Internet, Web caching is the traditional mechanism that
                 is used. In this article we develop a new staged
                 delivery model for a distributed architecture in which
                 video is streamed from remote servers to edge caches
                 where the video is buffered and then streamed to the
                 client through a last-mile connection. The model uses a
                 novel revolving indexed cache buffer management
                 mechanism at the edge cache and employs selective
                 retransmissions of lost packets between the remote and
                 edge cache for a best-effort recovery of the losses.
                 The new Web cache buffer management scheme includes a
                 dynamic adjustment of cache buffer parameters based on
                 network conditions. In addition, performance of buffer
                 management and retransmission policies at the edge
                 cache is modeled and assessed using a probabilistic
                 analysis of the streaming process as well as system
                 simulations. The influence of different endogenous
                 control parameters on the quality of stream received by
                 the client is studied. Calibration curves on the QoS
                 metrics for different network conditions have been
                 obtained using simulations. Edge cache management can
                 be done using these calibration curves. ISPs can make
                 use of calibration curves to set the values of the
                 endogenous control parameters for specific QoS in
                 real-time streaming operations based on network
                 conditions. A methodology to benchmark transmission
                 characteristics using real-time traffic data is
                 developed to enable effective decision making on edge
                 cache buffer allocation and management strategies.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "buffering; edge cache; on-demand streaming; quality of
                 service; selective retransmissions; Web caching",
}

@Article{Zdun:2007:MPD,
  author =       "Uwe Zdun and Carsten Hentrich and Schahram Dustdar",
  title =        "Modeling process-driven and service-oriented
                 architectures using patterns and pattern primitives",
  journal =      j-TWEB,
  volume =       "1",
  number =       "3",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1281480.1281484",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:14 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Service-oriented architectures are increasingly used
                 in the context of business processes. However, the
                 proven practices for process-oriented integration of
                 services are not well documented yet. In addition,
                 modeling approaches for the integration of processes
                 and services are neither mature nor do they exactly
                 reflect the proven practices. In this article, we
                 propose a pattern language for process-oriented
                 integration of services to describe the proven
                 practices. Our main contribution is a modeling concept
                 based on pattern primitives for these patterns. A
                 pattern primitive is a fundamental, precisely specified
                 modeling element that represents a pattern. We present
                 a catalog of pattern primitives that are precisely
                 modeled using OCL constraints and map these primitives
                 to the patterns in the pattern language of
                 process-oriented integration of services. We also
                 present a model validation tool that we have developed
                 to support modeling the process-oriented integration of
                 services, and an industrial case study in which we have
                 applied our results.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "middleware; Service-oriented architecture; software
                 patterns",
}

@Article{Najork:2008:ISS,
  author =       "Marc Najork and Brian D. Davison",
  title =        "Introduction to special section on adversarial issues
                 in {Web} search",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326562",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Becchetti:2008:LAW,
  author =       "Luca Becchetti and Carlos Castillo and Debora Donato
                 and Ricardo Baeza-Yates and Stefano Leonardi",
  title =        "Link analysis for {Web} spam detection",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326563",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We propose link-based techniques for automatic
                 detection of Web spam, a term referring to pages which
                 use deceptive techniques to obtain undeservedly high
                 scores in search engines. The use of Web spam is
                 widespread and difficult to solve, mostly due to the
                 large size of the Web which means that, in practice,
                 many algorithms are infeasible.\par

                 We perform a statistical analysis of a large collection
                 of Web pages. In particular, we compute statistics of
                 the links in the vicinity of every Web page applying
                 rank propagation and probabilistic counting over the
                 entire Web graph in a scalable way. These statistical
                 features are used to build Web spam classifiers which
                 only consider the link structure of the Web, regardless
                 of page contents. We then present a study of the
                 performance of each of the classifiers alone, as well
                 as their combined performance, by testing them over a
                 large collection of Web link spam. After tenfold
                 cross-validation, our best classifiers have a
                 performance comparable to that of state-of-the-art spam
                 classifiers that use content attributes, but are
                 orthogonal to content-based methods.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "adversarial information retrieval; Link analysis",
}

@Article{Urvoy:2008:TWS,
  author =       "Tanguy Urvoy and Emmanuel Chauveau and Pascal Filoche
                 and Thomas Lavergne",
  title =        "Tracking {Web} spam with {HTML} style similarities",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326564",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Automatically generated content is ubiquitous in the
                 web: dynamic sites built using the three-tier paradigm
                 are good examples (e.g., commercial sites, blogs and
                 other sites edited using web authoring software), as
                 well as less legitimate spamdexing attempts (e.g., link
                 farms, faked directories).\par

                 Those pages built using the same generating method
                 (template or script) share a common ``look and feel''
                 that is not easily detected by common text
                 classification methods, but is more related to
                 stylometry.\par

                 In this work we study and compare several HTML style
                 similarity measures based on both textual and
                 extra-textual features in HTML source code. We also
                 propose a flexible algorithm to cluster a large
                 collection of documents according to these measures.
                 Since the proposed algorithm is based on locality
                 sensitive hashing (LSH), we first review this
                 technique.\par

                 We then describe how to use the HTML style similarity
                 clusters to pinpoint dubious pages and enhance the
                 quality of spam classifiers. We present an evaluation
                 of our algorithm on the WEBSPAM-UK2006 dataset.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Clustering; document similarity; search engine spam;
                 stylometry; templates identification",
}

@Article{Lin:2008:DST,
  author =       "Yu-Ru Lin and Hari Sundaram and Yun Chi and Junichi
                 Tatemura and Belle L. Tseng",
  title =        "Detecting splogs via temporal dynamics using
                 self-similarity analysis",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326565",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "This article addresses the problem of spam blog
                 (splog) detection using temporal and structural
                 regularity of content, post time and links. Splogs are
                 undesirable blogs meant to attract search engine
                 traffic, used solely for promoting affiliate sites.
                 Blogs represent popular online media, and splogs not
                 only degrade the quality of search engine results, but
                 also waste network resources. The splog detection
                 problem is made difficult due to the lack of stable
                 content descriptors.\par

                 We have developed a new technique for detecting splogs,
                 based on the observation that a blog is a dynamic,
                 growing sequence of entries (or posts) rather than a
                 collection of individual pages. In our approach, splogs
                 are recognized by their temporal characteristics and
                 content. There are three key ideas in our splog
                 detection framework. (a) We represent the blog temporal
                 dynamics using self-similarity matrices defined on the
                 histogram intersection similarity measure of the time,
                 content, and link attributes of posts, to investigate
                 the temporal changes of the post sequence. (b) We study
                 the blog temporal characteristics using a visual
                 representation derived from the self-similarity
                 measures. The visual signature reveals correlation
                 between attributes and posts, depending on the type of
                 blogs (normal blogs and splogs). (c) We propose two
                 types of novel temporal features to capture the splog
                 temporal characteristics. In our splog detector, these
                 novel features are combined with content based
                 features. We extract a content based feature vector
                 from blog home pages as well as from different parts of
                 the blog. The dimensionality of the feature vector is
                 reduced by Fisher linear discriminant analysis. We have
                 tested an SVM-based splog detector using proposed
                 features on real world datasets, with appreciable
                 results (90\% accuracy).",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Blogs; regularity; self-similarity; spam; splog
                 detection; temporal dynamics; topology",
}

@Article{Weinreich:2008:QAE,
  author =       "Harald Weinreich and Hartmut Obendorf and Eelco Herder
                 and Matthias Mayer",
  title =        "Not quite the average: {An} empirical study of {Web}
                 use",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326566",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In the past decade, the World Wide Web has been
                 subject to dramatic changes. Web sites have evolved
                 from static information resources to dynamic and
                 interactive applications that are used for a broad
                 scope of activities on a daily basis. To examine the
                 consequences of these changes on user behavior, we
                 conducted a long-term client-side Web usage study with
                 twenty-five participants. This report presents results
                 of this study and compares the user behavior with
                 previous long-term browser usage studies, which range
                 in age from seven to thirteen years. Based on the
                 empirical data and the interview results, various
                 implications for the interface design of browsers and
                 Web sites are discussed.\par

                 A major finding is the decreasing prominence of
                 backtracking in Web navigation. This can largely be
                 attributed to the increasing importance of dynamic,
                 service-oriented Web sites. Users do not navigate on
                 these sites searching for information, but rather
                 interact with an online application to complete certain
                 tasks. Furthermore, the usage of multiple windows and
                 tabs has partly replaced back button usage, posing new
                 challenges for user orientation and backtracking. We
                 found that Web browsing is a rapid activity even for
                 pages with substantial content, which calls for page
                 designs that allow for cursory reading. Click maps
                 provide additional information on how users interact
                 with the Web on page level. Finally, substantial
                 differences were observed between users, and
                 characteristic usage patterns for different types of
                 Web sites emphasize the need for more adaptive and
                 customizable Web browsers.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "browser interfaces; hypertext; Navigation; usability;
                 user study; Web; web browsing; web design; WWW",
}

@Article{Yu:2008:FWS,
  author =       "Qi Yu and Athman Bouguettaya",
  title =        "Framework for {Web} service query algebra and
                 optimization",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326567",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We present a query algebra that supports optimized
                 access of Web services through service-oriented
                 queries. The service query algebra is defined based on
                 a formal service model that provides a high-level
                 abstraction of Web services across an application
                 domain. The algebra defines a set of algebraic
                 operators. Algebraic service queries can be formulated
                 using these operators. This allows users to query their
                 desired services based on both functionality and
                 quality. We provide the implementation of each
                 algebraic operator. This enables the generation of
                 Service Execution Plans (SEPs) that can be used by
                 users to directly access services. We present an
                 optimization algorithm by extending the Dynamic
                 Programming (DP) approach to efficiently select the
                 SEPs with the best user-desired quality. The
                 experimental study validates the proposed algorithm by
                 demonstrating significant performance improvement
                 compared with the traditional DP approach.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "query optimization; service oriented computing;
                 service query; Web service",
}

@Article{Aleman-Meza:2008:SSA,
  author =       "Boanerges Aleman-Meza and Meenakshi Nagarajan and Li
                 Ding and Amit Sheth and I. Budak Arpinar and Anupam
                 Joshi and Tim Finin",
  title =        "Scalable semantic analytics on social networks for
                 addressing the problem of conflict of interest
                 detection",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326568",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In this article, we demonstrate the applicability of
                 semantic techniques for detection of Conflict of
                 Interest (COI). We explain the common challenges
                 involved in building scalable Semantic Web
                 applications, in particular those addressing
                 connecting-the-dots problems. We describe in detail the
                 challenges involved in two important aspects on
                 building Semantic Web applications, namely, data
                 acquisition and entity disambiguation (or reference
                 reconciliation). We extend upon our previous work where
                 we integrated the collaborative network of a subset of
                 DBLP researchers with persons in a Friend-of-a-Friend
                 social network (FOAF). Our method finds the connections
                 between people, measures collaboration strength, and
                 includes heuristics that use friendship/affiliation
                 information to provide an estimate of potential COI in
                 a peer-review scenario. Evaluations are presented by
                 measuring what could have been the COI between accepted
                 papers in various conference tracks and their
                 respective program committee members. The experimental
                 results demonstrate that scalability can be achieved by
                 using a dataset of over 3 million entities (all
                 bibliographic data from DBLP and a large collection of
                 FOAF documents).",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "conflict of interest; data fusion; DBLP; entity
                 disambiguation; ontologies; peer review process; RDF;
                 semantic analytics; semantic associations; Semantic
                 Web; social networks; swetoDblp",
}

@Article{Gmach:2008:AQS,
  author =       "Daniel Gmach and Stefan Krompass and Andreas Scholz
                 and Martin Wimmer and Alfons Kemper",
  title =        "Adaptive quality of service management for enterprise
                 services",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326569",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In the past, enterprise resource planning systems were
                 designed as monolithic software systems running on
                 centralized mainframes. Today, these systems are
                 (re-)designed as a repository of enterprise services
                 that are distributed throughout the available computing
                 infrastructure. These service oriented architectures
                 (SOAs) require advanced automatic and adaptive
                 management concepts in order to achieve a high quality
                 of service level in terms of, for example,
                 availability, responsiveness, and throughput. The
                 adaptive management has to allocate service instances
                 to computing resources, adapt the resource allocation
                 to unforeseen load fluctuations, and intelligently
                 schedule individual requests to guarantee negotiated
                 service level agreements (SLAs). Our AutoGlobe platform
                 provides such a comprehensive adaptive service
                 management comprising\par

                 --- static service-to-server allocation based on
                 automatically detected service utilization
                 patterns,\par

                 --- adaptive service management based on a fuzzy
                 controller that remedies exceptional situations by
                 automatically initiating, for example, service
                 migration, service replication (scale-out), and\par

                 --- adaptive scheduling of individual service requests
                 that prioritizes requests depending on the current
                 degree of service level conformance.\par

                 All three complementary control components are
                 described in detail, and their effectiveness is
                 analyzed by means of realistic business application
                 scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "fuzzy controller; Quality of service; workload
                 characterization",
}

@Article{Yang:2008:DGN,
  author =       "Bo Yang and Jiming Liu",
  title =        "Discovering global network communities based on local
                 centralities",
  journal =      j-TWEB,
  volume =       "2",
  number =       "1",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1326561.1326570",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:25 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "One of the central problems in studying and
                 understanding complex networks, such as online social
                 networks or World Wide Web, is to discover hidden,
                 either physically (e.g., interactions or hyperlinks) or
                 logically (e.g., profiles or semantics) well-defined
                 topological structures. From a practical point of view,
                 a good example of such structures would be so-called
                 network communities. Earlier studies have introduced
                 various formulations as well as methods for the problem
                 of identifying or extracting communities. While each of
                 them has pros and cons as far as the effectiveness and
                 efficiency are concerned, almost none of them has
                 explicitly dealt with the potential relationship
                 between the global topological property of a network
                 and the local property of individual nodes. In order to
                 study this problem, this paper presents a new
                 algorithm, called ICS, which aims to discover natural
                 network communities by inferring from the local
                 information of nodes inherently hidden in networks
                 based on a new centrality, that is, clustering
                 centrality, which is a generalization of eigenvector
                 centrality. As compared with existing methods, our
                 method runs efficiently with a good clustering
                 performance. Additionally, it is insensitive to its
                 built-in parameters and prior knowledge.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "centrality; community mining; Complex network; graph
                 theory; World Wide Web",
}

@Article{Dustdar:2008:ISI,
  author =       "Schahram Dustdar and Bernd J. Kr{\"a}mer",
  title =        "Introduction to special issue on service oriented
                 computing {(SOC)}",
  journal =      j-TWEB,
  volume =       "2",
  number =       "2",
  pages =        "10:1--10:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1346337.1346338",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Belhajjame:2008:AAW,
  author =       "Khalid Belhajjame and Suzanne M. Embury and Norman W.
                 Paton and Robert Stevens and Carole A. Goble",
  title =        "Automatic annotation of {Web} services based on
                 workflow definitions",
  journal =      j-TWEB,
  volume =       "2",
  number =       "2",
  pages =        "11:1--11:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1346237.1346239",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Semantic annotations of web services can support the
                 effective and efficient discovery of services, and
                 guide their composition into workflows. At present,
                 however, the practical utility of such annotations is
                 limited by the small number of service annotations
                 available for general use. Manual annotation of
                 services is a time consuming and thus expensive task,
                 so some means are required by which services can be
                 automatically (or semi-automatically) annotated. In
                 this paper, we show how information can be inferred
                 about the semantics of operation parameters based on
                 their connections to other (annotated) operation
                 parameters within tried-and-tested workflows. Because
                 the data links in the workflows do not necessarily
                 contain every possible connection of compatible
                 parameters, we can infer only constraints on the
                 semantics of parameters. We show that despite their
                 imprecise nature these so-called {\em loose
                 annotations\/} are still of value in supporting the
                 manual annotation task, inspecting workflows and
                 discovering services. We also show that derived
                 annotations for already annotated parameters are
                 useful. By comparing existing and newly derived
                 annotations of operation parameters, we can support the
                 detection of errors in existing annotations, the
                 ontology used for annotation and in workflows. The
                 derivation mechanism has been implemented, and its
                 practical applicability for inferring new annotations
                 has been established through an experimental
                 evaluation. The usefulness of the derived annotations
                 is also demonstrated.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "automatic annotation; ontologies; semantic
                 annotations; Semantic web services; workflows",
}

@Article{Elgedawy:2008:CAH,
  author =       "Islam Elgedawy and Zahir Tari and James A. Thom",
  title =        "Correctness-aware high-level functional matching
                 approaches for semantic {Web} services",
  journal =      j-TWEB,
  volume =       "2",
  number =       "2",
  pages =        "12:1--12:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1346237.1346240",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Service matching approaches trade precision for
                 recall, creating the need for users to choose the
                 correct services, which obviously is a major obstacle
                 for automating the service discovery and aggregation
                 processes. Our approach to overcome this problem, is to
                 eliminate the appearance of false positives by
                 returning only the correct services. As different users
                 have different semantics for what is correct, we argue
                 that the correctness of the matching results must be
                 determined according to the achievement of users'
                 goals: that only services achieving users' goals are
                 considered correct. To determine such correctness, we
                 argue that the matching process should be based
                 primarily on the high-level functional specifications
                 (namely goals, achievement contexts, and external
                 behaviors). In this article, we propose models, data
                 structures, algorithms, and theorems required to
                 correctly match such specifications. We propose a model
                 called $ G^+ $, to capture such specifications, for
                 both services and users, in a machine-understandable
                 format. We propose a data structure, called a Concepts
                 Substitutability Graph (CSG), to capture the
                 substitution semantics of application domain concepts
                 in a context-based manner, in order to determine the
                 semantic-preserving mapping transformations required to
                 match different {\em G\/}$^+$ models. We also propose a
                 behavior matching approach that is able to match states
                 in an m-to-n manner, such that behavior models with
                 different numbers of state transitions can be matched.
                 Finally, we show how services are matched and
                 aggregated according to their $ G^+ $ models. Results
                 of supporting experiments demonstrate the advantages of
                 the proposed service matching approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "High-level functional matching; semantic Web services;
                 service aggregation",
}

@Article{Ryu:2008:SDE,
  author =       "Seung Hwan Ryu and Fabio Casati and Halvard Skogsrud
                 and Boualem Benatallah and R{\'e}gis Saint-Paul",
  title =        "Supporting the dynamic evolution of {Web} service
                 protocols in service-oriented architectures",
  journal =      j-TWEB,
  volume =       "2",
  number =       "2",
  pages =        "13:1--13:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1346237.1346241",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In service-oriented architectures, everything is a
                 service and everyone is a service provider. Web
                 services (or simply services) are loosely coupled
                 software components that are published, discovered, and
                 invoked across the Web. As the use of Web service
                 grows, in order to correctly interact with them, it is
                 important to understand the business protocols that
                 provide clients with the information on how to interact
                 with services. In dynamic Web service environments,
                 service providers need to constantly adapt their
                 business protocols for reflecting the restrictions and
                 requirements proposed by new applications, new business
                 strategies, and new laws, or for fixing problems found
                 in the protocol definition. However, the effective
                 management of such a protocol evolution raises critical
                 problems: one of the most critical issues is how to
                 handle instances running under the old protocol when it
                 has been changed. Simple solutions, such as aborting
                 them or allowing them to continue to run according to
                 the old protocol, can be considered, but they are
                 inapplicable for many reasons (for example, the loss of
                 work already done and the critical nature of work). In
                 this article, we present a framework that supports
                 service managers in managing the business protocol
                 evolution by providing several features, such as a
                 variety of protocol change impact analyses
                 automatically determining which ongoing instances can
                 be migrated to the new version of protocol, and data
                 mining techniques inferring interaction patterns used
                 for classifying ongoing instances migrateable to the
                 new protocol. To support the protocol evolution
                 process, we have also developed database-backed GUI
                 tools on top of our existing system. The proposed
                 approach and tools can help service managers in
                 managing the evolution of ongoing instances when the
                 business protocols of services with which they are
                 interacting have changed.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Business protocols; change impact analysis; decision
                 trees; dynamic evolution; ongoing instances; Web
                 services",
}

@Article{Schafer:2008:EFA,
  author =       "Michael Sch{\"a}fer and Peter Dolog and Wolfgang
                 Nejdl",
  title =        "An environment for flexible advanced compensations of
                 {Web} service transactions",
  journal =      j-TWEB,
  volume =       "2",
  number =       "2",
  pages =        "14:1--14:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1346237.1346242",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:47 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Business to business integration has recently been
                 performed by employing Web service environments.
                 Moreover, such environments are being provided by major
                 players on the technology markets. Those environments
                 are based on open specifications for transaction
                 coordination. When a failure in such an environment
                 occurs, a compensation can be initiated to recover from
                 the failure. However, current environments have only
                 limited capabilities for compensations, and are usually
                 based on backward recovery. In this article, we
                 introduce an environment to deal with advanced
                 compensations based on forward recovery principles. We
                 extend the existing Web service transaction
                 coordination architecture and infrastructure in order
                 to support flexible compensation operations. We use a
                 contract-based approach, which allows the specification
                 of permitted compensations at runtime. We introduce
                 {\em abstract service\/} and {\em adapter\/}
                 components, which allow us to separate the compensation
                 logic from the coordination logic. In this way, we can
                 easily plug in or plug out different compensation
                 strategies based on a specification language defined on
                 top of basic compensation activities and complex
                 compensation types. Experiments with our approach and
                 environment show that such an approach to compensation
                 is feasible and beneficial. Additionally, we introduce
                 a cost-benefit model to evaluate the proposed
                 environment based on net value analysis. The evaluation
                 shows in which circumstances the environment is
                 economical.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "compensations; forward-recovery; transactions; Web
                 services",
}

@Article{Srivatsa:2008:MAL,
  author =       "Mudhakar Srivatsa and Arun Iyengar and Jian Yin and
                 Ling Liu",
  title =        "Mitigating application-level denial of service attacks
                 on {Web} servers: {A} client-transparent approach",
  journal =      j-TWEB,
  volume =       "2",
  number =       "3",
  pages =        "15:1--15:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1377488.1377489",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Recently, we have seen increasing numbers of denial of
                 service (DoS) attacks against online services and Web
                 applications either for extortion reasons or for
                 impairing and even disabling the competition. These DoS
                 attacks have increasingly targeted the application
                 level. Application-level DoS attacks emulate the same
                 request syntax and network-level traffic
                 characteristics as those of legitimate clients, thereby
                 making the attacks much harder to detect and counter.
                 Moreover, such attacks often target bottleneck
                 resources such as disk bandwidth, database bandwidth,
                 and CPU resources. In this article, we propose handling
                 DoS attacks by using a twofold mechanism. First, we
                 perform admission control to limit the number of
                 concurrent clients served by the online service.
                 Admission control is based on port hiding that renders
                 the online service invisible to unauthorized clients by
                 hiding the port number on which the service accepts
                 incoming requests. Second, we perform congestion
                 control on admitted clients to allocate more resources
                 to good clients. Congestion control is achieved by
                 adaptively setting a client's priority level in
                 response to the client's requests in a way that can
                 incorporate application-level semantics. We present a
                 detailed evaluation of the proposed solution using two
                 sample applications: Apache HTTPD and the TPCW
                 benchmark (running on Apache Tomcat and IBM DB2). Our
                 experiments show that the proposed solution incurs low
                 performance overhead and is resilient to DoS attacks.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "client transparency; DoS Attacks; game theory; Web
                 servers",
}

@Article{White:2008:LPD,
  author =       "Ryen W. White and Mikhail Bilenko and Silviu
                 Cucerzan",
  title =        "Leveraging popular destinations to enhance {Web}
                 search interaction",
  journal =      j-TWEB,
  volume =       "2",
  number =       "3",
  pages =        "16:1--16:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1377488.1377490",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "This article presents a novel Web search interaction
                 feature that for a given query provides links to Web
                 sites frequently visited by other users with similar
                 information needs. These popular destinations
                 complement traditional search results, allowing direct
                 navigation to authoritative resources for the query
                 topic. Destinations are identified using the history of
                 the search and browsing behavior of many users over an
                 extended time period, and their collective behavior
                 provides a basis for computing source authority. They
                 are drawn from the end of users' postquery browse
                 trails where users may cease searching once they find
                 relevant information. We describe a user study that
                 compared the suggestion of destinations with the
                 previously proposed suggestion of related queries as
                 well as with traditional, unaided Web search. Results
                 show that search enhanced by query suggestions
                 outperforms other systems in terms of subject
                 perceptions and search effectiveness for fact-finding
                 search tasks. However, search enhanced by destination
                 suggestions performs best for exploratory tasks with
                 its best performance obtained from mining past user
                 behavior at query-level granularity. We discuss the
                 implications of these and other findings from our study
                 for the design of search systems that utilize user
                 behavior, in particular, user browse trails and popular
                 destinations.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "enhanced Web search; search destinations; User
                 studies",
}

@Article{Andreolini:2008:MFS,
  author =       "Mauro Andreolini and Sara Casolari and Michele
                 Colajanni",
  title =        "Models and framework for supporting runtime decisions
                 in {Web-based} systems",
  journal =      j-TWEB,
  volume =       "2",
  number =       "3",
  pages =        "17:1--17:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1377488.1377491",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:17:58 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Efficient management of distributed Web-based systems
                 requires several mechanisms that decide on request
                 dispatching, load balance, admission control, request
                 redirection. The algorithms behind these mechanisms
                 typically make fast decisions on the basis of the load
                 conditions of the system resources. The architecture
                 complexity and workloads characterizing most Web-based
                 services make it extremely difficult to deduce a
                 representative view of a resource load from collected
                 measures that show extreme variability even at
                 different time scales. Hence, any decision based on
                 instantaneous or average views of the system load may
                 lead to useless or even wrong actions. As an
                 alternative, we propose a two-phase strategy that first
                 aims to obtain a representative view of the load trend
                 from measured system values and then applies this
                 representation to support runtime decision systems. We
                 consider two classical problems behind decisions: how
                 to detect significant and nontransient load changes of
                 a system resource and how to predict its future load
                 behavior. The two-phase strategy is based on stochastic
                 functions that are characterized by a computational
                 complexity that is compatible with runtime decisions.
                 We describe, test, and tune the two-phase strategy by
                 considering as a first example a multitier Web-based
                 system that is subject to different classes of
                 realistic and synthetic workloads. Also, we integrate
                 the proposed strategy into a framework that we validate
                 by applying it to support runtime decisions in a
                 cluster Web system and in a locally distributed Network
                 Intrusion Detection System.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "distributed systems; load change detection; load
                 prediction; load representation; World Wide Web",
}

@Article{Amitay:2008:ISI,
  author =       "Einat Amitay and Andrei Broder",
  title =        "Introduction to special issue on query log analysis:
                 {Technology} and ethics",
  journal =      j-TWEB,
  volume =       "2",
  number =       "4",
  pages =        "18:1--18:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1409220.1409221",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Cooper:2008:SQL,
  author =       "Alissa Cooper",
  title =        "A survey of query log privacy-enhancing techniques
                 from a policy perspective",
  journal =      j-TWEB,
  volume =       "2",
  number =       "4",
  pages =        "19:1--19:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1409220.1409222",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "As popular search engines face the sometimes
                 conflicting interests of protecting privacy while
                 retaining query logs for a variety of uses, numerous
                 technical measures have been suggested to both enhance
                 privacy and preserve at least a portion of the utility
                 of query logs. This article seeks to assess seven of
                 these techniques against three sets of criteria: (1)
                 how well the technique protects privacy, (2) how well
                 the technique preserves the utility of the query logs,
                 and (3) how well the technique might be implemented as
                 a user control. A user control is defined as a
                 mechanism that allows individual Internet users to
                 choose to have the technique applied to their own query
                 logs.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "log; policy; Privacy; query; search",
}

@Article{Baeza-Yates:2008:DTO,
  author =       "Ricardo Baeza-Yates and Aristides Gionis and Flavio P.
                 Junqueira and Vanessa Murdock and Vassilis Plachouras
                 and Fabrizio Silvestri",
  title =        "Design trade-offs for search engine caching",
  journal =      j-TWEB,
  volume =       "2",
  number =       "4",
  pages =        "20:1--20:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1409220.1409223",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In this article we study the trade-offs in designing
                 efficient caching systems for Web search engines. We
                 explore the impact of different approaches, such as
                 static vs. dynamic caching, and caching query results
                 vs. caching posting lists. Using a query log spanning a
                 whole year, we explore the limitations of caching and
                 we demonstrate that caching posting lists can achieve
                 higher hit rates than caching query answers. We propose
                 a new algorithm for static caching of posting lists,
                 which outperforms previous methods. We also study the
                 problem of finding the optimal way to split the static
                 cache between answers and posting lists. Finally, we
                 measure how the changes in the query log influence the
                 effectiveness of static caching, given our observation
                 that the distribution of the queries changes slowly
                 over time. Our results and observations are applicable
                 to different levels of the data-access hierarchy, for
                 instance, for a memory/disk layer or a broker/remote
                 server layer.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Caching; query logs; Web search",
}

@Article{Richardson:2008:LAW,
  author =       "Matthew Richardson",
  title =        "Learning about the world through long-term query
                 logs",
  journal =      j-TWEB,
  volume =       "2",
  number =       "4",
  pages =        "21:1--21:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1409220.1409224",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In this article, we demonstrate the value of long-term
                 query logs. Most work on query logs to date considers
                 only short-term (within-session) query information. In
                 contrast, we show that long-term query logs can be used
                 to learn about the world we live in. There are many
                 applications of this that lead not only to improving
                 the search engine for its users, but also potentially
                 to advances in other disciplines such as medicine,
                 sociology, economics, and more. In this article, we
                 will show how long-term query logs can be used for
                 these purposes, and that their potential is severely
                 reduced if the logs are limited to short time horizons.
                 We show that query effects are long-lasting, provide
                 valuable information, and might be used to
                 automatically make medical discoveries, build concept
                 hierarchies, and generally learn about the sociological
                 behavior of users. We believe these applications are
                 only the beginning of what can be done with the
                 information contained in long-term query logs, and see
                 this work as a step toward unlocking their potential.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "data mining; knowledge discovery; query logs; user
                 behavior",
}

@Article{Koutrika:2008:CST,
  author =       "Georgia Koutrika and Frans Adjie Effendi and
                 Zolt{\'{}}n Gy{\"o}ngyi and Paul Heymann and Hector
                 Garcia-Molina",
  title =        "Combating spam in tagging systems: {An} evaluation",
  journal =      j-TWEB,
  volume =       "2",
  number =       "4",
  pages =        "22:1--22:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1409220.1409225",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:06 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Tagging systems allow users to interactively annotate
                 a pool of shared resources using descriptive strings
                 called {\em tags}. Tags are used to guide users to
                 interesting resources and help them build communities
                 that share their expertise and resources. As tagging
                 systems are gaining in popularity, they become more
                 susceptible to {\em tag spam\/}: misleading tags that
                 are generated in order to increase the visibility of
                 some resources or simply to confuse users. Our goal is
                 to understand this problem better. In particular, we
                 are interested in answers to questions such as: How
                 many malicious users can a tagging system tolerate
                 before results significantly degrade? What types of
                 tagging systems are more vulnerable to malicious
                 attacks? What would be the effort and the impact of
                 employing a trusted moderator to find bad postings? Can
                 a system automatically protect itself from spam, for
                 instance, by exploiting user tag patterns? In a quest
                 for answers to these questions, we introduce a
                 framework for modeling tagging systems and user tagging
                 behavior. We also describe a method for ranking
                 documents matching a tag based on taggers' reliability.
                 Using our framework, we study the behavior of existing
                 approaches under malicious attacks and the impact of a
                 moderator and our ranking method.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "bookmarking systems; tag spam; Tagging; tagging
                 models",
}

@Article{Rattenbury:2009:MEP,
  author =       "Tye Rattenbury and Mor Naaman",
  title =        "Methods for extracting place semantics from {Flickr}
                 tags",
  journal =      j-TWEB,
  volume =       "3",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1462148.1462149",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We describe an approach for extracting semantics for
                 tags, unstructured text-labels assigned to resources on
                 the Web, based on each tag's usage patterns. In
                 particular, we focus on the problem of extracting place
                 semantics for tags that are assigned to photos on
                 Flickr, a popular-photo sharing Web site that supports
                 location (latitude/longitude) metadata for photos. We
                 propose the adaptation of two baseline methods,
                 inspired by well-known burst-analysis techniques, for
                 the task; we also describe two novel methods, TagMaps
                 and scale-structure identification. We evaluate the
                 methods on a subset of Flickr data. We show that our
                 scale-structure identification method outperforms
                 existing techniques and that a hybrid approach
                 generates further improvements (achieving 85\%
                 precision at 81\% recall). The approach and methods
                 described in this work can be used in other domains
                 such as geo-annotated Web pages, where text terms can
                 be extracted and associated with usage patterns.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "places; semantics; Tagging systems; tags",
}

@Article{Jackson:2009:PBD,
  author =       "Collin Jackson and Adam Barth and Andrew Bortz and
                 Weidong Shao and Dan Boneh",
  title =        "Protecting browsers from {DNS} rebinding attacks",
  journal =      j-TWEB,
  volume =       "3",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1462148.1462150",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "DNS rebinding attacks subvert the same-origin policy
                 of browsers, converting them into open network proxies.
                 Using DNS rebinding, an attacker can circumvent
                 organizational and personal firewalls, send spam email,
                 and defraud pay-per-click advertisers. We evaluate the
                 cost effectiveness of mounting DNS rebinding attacks,
                 finding that an attacker requires less than \$100 to
                 hijack 100,000 IP addresses. We analyze defenses to DNS
                 rebinding attacks, including improvements to the
                 classic ``DNS pinning,'' and recommend changes to
                 browser plug-ins, firewalls, and Web servers. Our
                 defenses have been adopted by plug-in vendors and by a
                 number of open-source firewall implementations.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "click fraud; DNS; firewall; Same-origin policy; spam",
}

@Article{Bar-Yossef:2009:DCD,
  author =       "Ziv Bar-Yossef and Idit Keidar and Uri Schonfeld",
  title =        "Do not crawl in the {DUST}: {Different URLs with
                 Similar Text}",
  journal =      j-TWEB,
  volume =       "3",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1462148.1462151",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We consider the problem of DUST: Different URLs with
                 Similar Text. Such duplicate URLs are prevalent in Web
                 sites, as Web server software often uses aliases and
                 redirections, and dynamically generates the same page
                 from various different URL requests. We present a novel
                 algorithm, {\em DustBuster}, for uncovering DUST; that
                 is, for discovering rules that transform a given URL to
                 others that are likely to have similar content.
                 DustBuster mines DUST effectively from previous crawl
                 logs or Web server logs, {\em without\/} examining page
                 contents. Verifying these rules via sampling requires
                 fetching few actual Web pages. Search engines can
                 benefit from information about DUST to increase the
                 effectiveness of crawling, reduce indexing overhead,
                 and improve the quality of popularity statistics such
                 as PageRank.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "antialiasing; crawling; duplicate detection; Search
                 engines; URL normalization",
}

@Article{Xiao:2009:BSD,
  author =       "Xiangye Xiao and Qiong Luo and Dan Hong and Hongbo Fu
                 and Xing Xie and Wei-Ying Ma",
  title =        "Browsing on small displays by transforming {Web} pages
                 into hierarchically structured subpages",
  journal =      j-TWEB,
  volume =       "3",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1462148.1462152",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:15 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We propose a new Web page transformation method to
                 facilitate Web browsing on handheld devices such as
                 Personal Digital Assistants (PDAs). In our approach, an
                 original Web page that does not fit on the screen is
                 transformed into a set of subpages, each of which fits
                 on the screen. This transformation is done through
                 slicing the original page into page blocks iteratively,
                 with several factors considered. These factors include
                 the size of the screen, the size of each page block,
                 the number of blocks in each transformed page, the
                 depth of the tree hierarchy that the transformed pages
                 form, as well as the semantic coherence between blocks.
                 We call the tree hierarchy of the transformed pages an
                 SP-tree. In an SP-tree, an internal node consists of a
                 textually enhanced thumbnail image with hyperlinks, and
                 a leaf node is a block extracted from a subpage of the
                 original Web page. We adaptively adjust the fanout and
                 the height of the SP-tree so that each thumbnail image
                 is clear enough for users to read, while at the same
                 time, the number of clicks needed to reach a leaf page
                 is few. Through this transformation algorithm, we
                 preserve the contextual information in the original Web
                 page and reduce scrolling. We have implemented this
                 transformation module on a proxy server and have
                 conducted usability studies on its performance. Our
                 system achieved a shorter task completion time compared
                 with that of transformations from the Opera browser in
                 nine of ten tasks. The average improvement on familiar
                 pages was 44\%. The average improvement on unfamiliar
                 pages was 37\%. Subjective responses were positive.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Proxy; slicing tree; small displays; thumbnails; Web
                 browsing; Web page adaptation",
}

@Article{Gabrilovich:2009:CSQ,
  author =       "Evgeniy Gabrilovich and Andrei Broder and Marcus
                 Fontoura and Amruta Joshi and Vanja Josifovski and
                 Lance Riedel and Tong Zhang",
  title =        "Classifying search queries using the {Web} as a source
                 of knowledge",
  journal =      j-TWEB,
  volume =       "3",
  number =       "2",
  pages =        "5:1--5:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1513876.1513877",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:23 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We propose a methodology for building a robust query
                 classification system that can identify thousands of
                 query classes, while dealing in real time with the
                 query volume of a commercial Web search engine. We use
                 a pseudo relevance feedback technique: given a query,
                 we determine its topic by classifying the Web search
                 results retrieved by the query. Motivated by the needs
                 of search advertising, we primarily focus on rare
                 queries, which are the hardest from the point of view
                 of machine learning, yet in aggregate account for a
                 considerable fraction of search engine traffic.
                 Empirical evaluation confirms that our methodology
                 yields a considerably higher classification accuracy
                 than previously reported. We believe that the proposed
                 methodology will lead to better matching of online ads
                 to rare queries and overall to a better user
                 experience.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Pseudo relevance feedback; query classification; Web
                 search",
}

@Article{Reay:2009:LSE,
  author =       "Ian Reay and Scott Dick and James Miller",
  title =        "A large-scale empirical study of {P3P} privacy
                 policies: {Stated} actions vs. legal obligations",
  journal =      j-TWEB,
  volume =       "3",
  number =       "2",
  pages =        "6:1--6:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1513876.1513878",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:23 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Numerous studies over the past ten years have shown
                 that concern for personal privacy is a major impediment
                 to the growth of e-commerce. These concerns are so
                 serious that most if not all consumer watchdog groups
                 have called for some form of privacy protection for
                 Internet users. In response, many nations around the
                 world, including all European Union nations, Canada,
                 Japan, and Australia, have enacted national legislation
                 establishing mandatory safeguards for personal privacy.
                 However, recent evidence indicates that Web sites might
                 not be adhering to the requirements of this
                 legislation. The goal of this study is to examine the
                 posted privacy policies of Web sites, and compare these
                 statements to the legal mandates under which the Web
                 sites operate. We harvested all available P3P (Platform
                 for Privacy Preferences Protocol) documents from the
                 100,000 most popular Web sites (over 3,000 full
                 policies, and another 3,000 compact policies). This
                 allows us to undertake an automated analysis of
                 adherence to legal mandates on Web sites that most
                 impact the average Internet user. Our findings show
                 that Web sites generally do not even claim to follow
                 all the privacy-protection mandates in their legal
                 jurisdiction (we do not examine actual practice, only
                 posted policies). Furthermore, this general statement
                 appears to be true for every jurisdiction with privacy
                 laws and any significant number of P3P policies,
                 including European Union nations, Canada, Australia,
                 and Web sites in the USA Safe Harbor program.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "electronic commerce; legislation and enforcement; P3P;
                 Privacy protection",
}

@Article{Dourisboure:2009:ECD,
  author =       "Yon Dourisboure and Filippo Geraci and Marco
                 Pellegrini",
  title =        "Extraction and classification of dense implicit
                 communities in the {Web} graph",
  journal =      j-TWEB,
  volume =       "3",
  number =       "2",
  pages =        "7:1--7:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1513876.1513879",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Fri Apr 24 18:18:23 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The World Wide Web (WWW) is rapidly becoming important
                 for society as a medium for sharing data, information,
                 and services, and there is a growing interest in tools
                 for understanding collective behavior and emerging
                 phenomena in the WWW. In this article we focus on the
                 problem of searching and classifying {\em
                 communities\/} in the Web. Loosely speaking a community
                 is a group of pages related to a common interest. More
                 formally, communities have been associated in the
                 computer science literature with the existence of a
                 locally dense subgraph of the Web graph (where Web
                 pages are nodes and hyperlinks are arcs of the Web
                 graph). The core of our contribution is a new scalable
                 algorithm for finding relatively dense subgraphs in
                 massive graphs. We apply our algorithm on Web graphs
                 built on three publicly available large crawls of the
                 Web (with raw sizes up to 120M nodes and 1G arcs). The
                 effectiveness of our algorithm in finding dense
                 subgraphs is demonstrated experimentally by embedding
                 artificial communities in the Web graph and counting
                 how many of these are blindly found. Effectiveness
                 increases with the size and density of the communities:
                 it is close to 100\% for communities of thirty nodes or
                 more (even at low density). It is still about 80\% even
                 for communities of twenty nodes with density over 50\%
                 of the arcs present. At the lower extremes the
                 algorithm catches 35\% of dense communities made of ten
                 nodes. We also develop some sufficient conditions for
                 the detection of a community under some local graph
                 models and not-too-restrictive hypotheses. We complete
                 our {\em Community Watch\/} system by clustering the
                 communities found in the Web graph into homogeneous
                 groups by topic and labeling each group by
                 representative keywords.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "communities; detection of dense subgraph; Web graph",
}

@Article{Lee:2009:ISB,
  author =       "Hsin-Tsang Lee and Derek Leonard and Xiaoming Wang and
                 Dmitri Loguinov",
  title =        "{IRLbot}: {Scaling} to 6 billion pages and beyond",
  journal =      j-TWEB,
  volume =       "3",
  number =       "3",
  pages =        "8:1--8:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1541822.1541823",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:38 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "This article shares our experience in designing a Web
                 crawler that can download billions of pages using a
                 single-server implementation and models its
                 performance. We first show that current crawling
                 algorithms cannot effectively cope with the sheer
                 volume of URLs generated in large crawls, highly
                 branching spam, legitimate multimillion-page blog
                 sites, and infinite loops created by server-side
                 scripts. We then offer a set of techniques for dealing
                 with these issues and test their performance in an
                 implementation we call IRLbot. In our recent experiment
                 that lasted 41 days, IRLbot running on a single server
                 successfully crawled 6.3 billion valid HTML pages (7.6
                 billion connection requests) and sustained an average
                 download rate of 319 mb/s (1,789 pages/s). Unlike our
                 prior experiments with algorithms proposed in related
                 work, this version of IRLbot did not experience any
                 bottlenecks and successfully handled content from over
                 117 million hosts, parsed out 394 billion links, and
                 discovered a subset of the Web graph with 41 billion
                 unique nodes.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "crawling; IRLbot; large scale",
}

@Article{Tappenden:2009:CDS,
  author =       "Andrew F. Tappenden and James Miller",
  title =        "Cookies: {A} deployment study and the testing
                 implications",
  journal =      j-TWEB,
  volume =       "3",
  number =       "3",
  pages =        "9:1--9:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1541822.1541824",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:38 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The results of an extensive investigation of cookie
                 deployment amongst 100,000 Internet sites are
                 presented. Cookie deployment is found to be approaching
                 universal levels and hence there exists an associated
                 need for relevant Web and software engineering
                 processes, specifically testing strategies which
                 actively consider cookies. The semi-automated
                 investigation demonstrates that over two-thirds of the
                 sites studied deploy cookies. The investigation
                 specifically examines the use of first-party,
                 third-party, sessional, and persistent cookies within
                 Web-based applications, identifying the presence of a
                 P3P policy and dynamic Web technologies as major
                 predictors of cookie usage. The results are juxtaposed
                 with the lack of testing strategies present in the
                 literature. A number of real-world examples, including
                 two case studies are presented, further accentuating
                 the need for comprehensive testing strategies for
                 Web-based applications. The use of antirandom test case
                 generation is explored with respect to the testing
                 issues discussed. Finally, a number of seeding vectors
                 are presented, providing a basis for testing cookies
                 within Web-based applications.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Cookies; Internet browser; software testing; Web
                 engineering; Web technologies",
}

@Article{Comuzzi:2009:FQB,
  author =       "Marco Comuzzi and Barbara Pernici",
  title =        "A framework for {QoS}-based {Web} service
                 contracting",
  journal =      j-TWEB,
  volume =       "3",
  number =       "3",
  pages =        "10:1--10:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1541822.1541825",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:38 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The extensive adoption of Web service-based
                 applications in dynamic business scenarios, such as
                 on-demand computing or highly reconfigurable virtual
                 enterprises, advocates for methods and tools for the
                 management of Web service nonfunctional aspects, such
                 as Quality of Service (QoS). Concerning contracts on
                 Web service QoS, the literature has mostly focused on
                 the contract definition and on mechanisms for contract
                 enactment, such as the monitoring of the satisfaction
                 of negotiated QoS guarantees. In this context, this
                 article proposes a framework for the automation of the
                 Web service contract specification and establishment.
                 An extensible model for defining both domain-dependent
                 and domain-independent Web service QoS dimensions and a
                 method for the automation of the contract establishment
                 phase are proposed. We describe a matchmaking algorithm
                 for the ranking of functionally equivalent services,
                 which orders services on the basis of their ability to
                 fulfill the service requestor requirements, while
                 maintaining the price below a specified budget. We also
                 provide an algorithm for the configuration of the
                 negotiable part of the QoS Service-Level Agreement
                 (SLA), which is used to configure the agreement with
                 the top-ranked service identified in the matchmaking
                 phase. Experimental results show that, in a utility
                 theory perspective, the contract establishment phase
                 leads to efficient outcomes. We envision two advanced
                 application scenarios for the Web service contracting
                 framework proposed in this article. First, it can be
                 used to enhance Web services self-healing properties in
                 reaction to QoS-related service failures; second, it
                 can be exploited in process optimization for the online
                 reconfiguration of candidate Web services QoS SLAs.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "matchmaking; negotiation; QoS; service selection; SLA;
                 Web service",
}

@Article{Pilioura:2009:UPD,
  author =       "Thomi Pilioura and Aphrodite Tsalgatidou",
  title =        "Unified publication and discovery of semantic {Web}
                 services",
  journal =      j-TWEB,
  volume =       "3",
  number =       "3",
  pages =        "11:1--11:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1541822.1541826",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:38 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The challenge of publishing and discovering Web
                 services has recently received lots of attention.
                 Various solutions to this problem have been proposed
                 which, apart from their offered advantages, suffer the
                 following disadvantages: (i) most of them are
                 syntactic-based, leading to poor precision and recall,
                 (ii) they are not scalable to large numbers of
                 services, and (iii) they are incompatible, thus
                 yielding in cumbersome service publication and
                 discovery. This article presents the principles, the
                 functionality, and the design of PYRAMID-S which
                 addresses these disadvantages by providing a scalable
                 framework for unified publication and discovery of
                 semantically enhanced services over heterogeneous
                 registries. PYRAMID-S uses a hybrid peer-to-peer
                 topology to organize Web service registries based on
                 domains. In such a topology, each Registry retains its
                 autonomy, meaning that it can use the publication and
                 discovery mechanisms as well as the ontology of its
                 choice. The viability of this approach is demonstrated
                 through the implementation and experimental analysis of
                 a prototype.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "evaluation; PYRAMID-S; scalable; semantic Web
                 services; unified; Web service discovery; Web service
                 publication",
}

@Article{Golbeck:2009:TNP,
  author =       "Jennifer Golbeck",
  title =        "Trust and nuanced profile similarity in online social
                 networks",
  journal =      j-TWEB,
  volume =       "3",
  number =       "4",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1594173.1594174",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:43 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Singh:2009:SSO,
  author =       "Aameek Singh and Mudhakar Srivatsa and Ling Liu",
  title =        "Search-as-a-service: {Outsourced} search over
                 outsourced storage",
  journal =      j-TWEB,
  volume =       "3",
  number =       "4",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1594173.1594175",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:43 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Robu:2009:ECS,
  author =       "Valentin Robu and Harry Halpin and Hana Shepherd",
  title =        "Emergence of consensus and shared vocabularies in
                 collaborative tagging systems",
  journal =      j-TWEB,
  volume =       "3",
  number =       "4",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1594173.1594176",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:43 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Zheng:2010:UTM,
  author =       "Yu Zheng and Yukun Chen and Quannan Li and Xing Xie
                 and Wei-Ying Ma",
  title =        "Understanding transportation modes based on {GPS} data
                 for {Web} applications",
  journal =      j-TWEB,
  volume =       "4",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1658373.1658374",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Li:2010:DSO,
  author =       "Guoli Li and Vinod Muthusamy and Hans-Arno Jacobsen",
  title =        "A distributed service-oriented architecture for
                 business process execution",
  journal =      j-TWEB,
  volume =       "4",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1658373.1658375",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Montali:2010:DSV,
  author =       "Marco Montali and Maja Pesic and Wil M. P. van der
                 Aalst and Federico Chesani and Paola Mello and Sergio
                 Storari",
  title =        "Declarative specification and verification of service
                 choreographies",
  journal =      j-TWEB,
  volume =       "4",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1658373.1658376",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Mar 16 09:28:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Almishari:2010:APD,
  author =       "Mishari Almishari and Xiaowei Yang",
  title =        "Ads-portal domains: {Identification} and
                 measurements",
  journal =      j-TWEB,
  volume =       "4",
  number =       "2",
  pages =        "4:1--4:??",
  month =        apr,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1734200.1734201",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:32 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "An ads-portal domain refers to a Web domain that shows
                 only advertisements, served by a third-party
                 advertisement syndication service, in the form of ads
                 listing. We develop a machine-learning-based classifier
                 to identify ads-portal domains, which has 96\%
                 accuracy. We use this classifier to measure the
                 prevalence of ads-portal domains on the Internet.
                 Surprisingly, 28.3/25\% of the (two-level) {\tt *.com}
                 /{\tt *.net} web domains are ads-portal domains. Also,
                 41/39.8\% of {\tt *.com} /{\tt *.net} ads-portal
                 domains are typos of well-known domains, also known as
                 typo-squatting domains. In addition, we use the
                 classifier along with DNS trace files to estimate how
                 often Internet users visit ads-portal domains. It turns
                 out that $ \approx 5 \% $ of the two-level {\tt *.com},
                 {\tt *.net}, {\tt *.org}, {\tt *.biz} and {\tt *.info}
                 web domains on the traces are ads-portal domains and $
                 \approx 50 \% $ of these accessed ads-portal domains
                 are typos. These numbers show that ads-portal domains
                 and typo-squatting ads-portal domains are prevalent on
                 the Internet and successful in attracting many visits.
                 Our classifier represents a step towards better
                 categorizing the web documents. It can also be helpful
                 to search engines ranking algorithms, helpful in
                 identifying web spams that redirects to ads-portal
                 domains, and used to discourage access to
                 typo-squatting ads-portal domains.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Ads-portal; advertisement syndication; data mining;
                 parked domain; parking service; Web characterization",
}

@Article{Jurca:2010:RIB,
  author =       "Radu Jurca and Florent Garcin and Arjun Talwar and Boi
                 Faltings",
  title =        "Reporting incentives and biases in online review
                 forums",
  journal =      j-TWEB,
  volume =       "4",
  number =       "2",
  pages =        "5:1--5:??",
  month =        apr,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1734200.1734202",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:32 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Online reviews have become increasingly popular as a
                 way to judge the quality of various products and
                 services. However, recent work demonstrates that the
                 absence of reporting incentives leads to a biased set
                 of reviews that may not reflect the true quality. In
                 this paper, we investigate underlying factors that
                 influence users when reporting feedback. In particular,
                 we study both reporting incentives and reporting biases
                 observed in a widely used review forum, the Tripadvisor
                 Web site. We consider three sources of information:
                 first, the numerical ratings left by the user for
                 different aspects of quality; second, the textual
                 comment accompanying a review; third, the patterns in
                 the time sequence of reports. We first show that groups
                 of users who discuss a certain feature at length are
                 more likely to agree in their ratings. Second, we show
                 that users are more motivated to give feedback when
                 they perceive a greater risk involved in a transaction.
                 Third, a user's rating partly reflects the difference
                 between true quality and prior expectation of quality,
                 as inferred from previous reviews. We finally observe
                 that because of these biases, when averaging review
                 scores there are strong differences between the mean
                 and the median. We speculate that the median may be a
                 better way to summarize the ratings.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Online reviews; reputation mechanisms",
}

@Article{Vlachos:2010:ODB,
  author =       "Michail Vlachos and Suleyman S. Kozat and Philip S.
                 Yu",
  title =        "Optimal distance bounds for fast search on compressed
                 time-series query logs",
  journal =      j-TWEB,
  volume =       "4",
  number =       "2",
  pages =        "6:1--6:??",
  month =        apr,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1734200.1734203",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:32 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Consider a database of time-series, where each
                 datapoint in the series records the total number of
                 users who asked for a specific query at an Internet
                 search engine. Storage and analysis of such logs can be
                 very beneficial for a search company from multiple
                 perspectives. First, from a data organization
                 perspective, because query Weblogs capture important
                 trends and statistics, they can help enhance and
                 optimize the search experience (keyword recommendation,
                 discovery of news events). Second, Weblog data can
                 provide an important polling mechanism for the
                 microeconomic aspects of a search engine, since they
                 can facilitate and promote the advertising facet of the
                 search engine (understand what users request and when
                 they request it).\par

                 Due to the sheer amount of time-series Weblogs,
                 manipulation of the logs in a compressed form is an
                 impeding necessity for fast data processing and compact
                 storage requirements. Here, we explicate how to compute
                 the lower and upper distance bounds on the time-series
                 logs when working directly on their compressed form.
                 Optimal distance estimation means tighter bounds,
                 leading to better candidate selection/elimination and
                 ultimately faster search performance. Our derivation of
                 the optimal distance bounds is based on the careful
                 analysis of the problem using optimization principles.
                 The experimental evaluation suggests a clear
                 performance advantage of the proposed method, compared
                 to previous compression/search techniques. The
                 presented method results in a 10--30\% improvement on
                 distance estimations, which in turn leads to 25--80\%
                 improvement on the search performance.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Fraternali:2010:ERI,
  author =       "Piero Fraternali and Sara Comai and Alessandro Bozzon
                 and Giovanni Toffetti Carughi",
  title =        "Engineering rich {Internet} applications with a
                 model-driven approach",
  journal =      j-TWEB,
  volume =       "4",
  number =       "2",
  pages =        "7:1--7:??",
  month =        apr,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1734200.1734204",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:32 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Rich Internet Applications (RIAs) have introduced
                 powerful novel functionalities into the Web
                 architecture, borrowed from client-server and desktop
                 applications. The resulting platforms allow designers
                 to improve the user's experience, by exploiting
                 client-side data and computation, bidirectional
                 client-server communication, synchronous and
                 asynchronous events, and rich interface widgets.
                 However, the rapid evolution of RIA technologies
                 challenges the Model-Driven Development methodologies
                 that have been successfully applied in the past decade
                 to traditional Web solutions. This paper illustrates an
                 evolutionary approach for incorporating a wealth of RIA
                 features into an existing Web engineering methodology
                 and notation. The experience demonstrates that it is
                 possible to model RIA application requirements at a
                 high-level using a platform-independent notation, and
                 generate the client-side and server-side code
                 automatically. The resulting approach is evaluated in
                 terms of expressive power, ease of use, and
                 implementability.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "information interfaces and presentation; information
                 storage and retrieval; model-driven development; Rich
                 Internet applications; Web engineering",
}

@Article{Xiao:2010:LSS,
  author =       "Xiangye Xiao and Qiong Luo and Zhisheng Li and Xing
                 Xie and Wei-Ying Ma",
  title =        "A large-scale study on map search logs",
  journal =      j-TWEB,
  volume =       "4",
  number =       "3",
  pages =        "8:1--8:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1806916.1806917",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Map search engines, such as Google Maps, Yahoo! Maps,
                 and Microsoft Live Maps, allow users to explicitly
                 specify a target geographic location, either in
                 keywords or on the map, and to search businesses,
                 people, and other information of that location. In this
                 article, we report a first study on a million-entry map
                 search log. We identify three key attributes of a map
                 search record --- the keyword query, the target
                 location and the user location, and examine the
                 characteristics of these three dimensions separately as
                 well as the associations between them. Comparing our
                 results with those previously reported on logs of
                 general search engines and mobile search engines,
                 including those for geographic queries, we discover the
                 following unique features of map search: (1) People use
                 longer queries and modify queries more frequently in a
                 session than in general search and mobile search;
                 People view fewer result pages per query than in
                 general search; (2) The popular query topics in map
                 search are different from those in general search and
                 mobile search; (3) The target locations in a session
                 change within 50 kilometers for almost 80\% of the
                 sessions; (4) Queries, search target locations and user
                 locations (both at the city level) all follow the power
                 law distribution; (5) One third of queries are issued
                 for target locations within 50 kilometers from the user
                 locations; (6) The distribution of a query over target
                 locations appears to follow the geographic location of
                 the queried entity.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "local search; log analysis; Map search; query
                 categorization; search interface; user behavior",
}

@Article{Malak:2010:MWQ,
  author =       "Ghazwa Malak and Houari Sahraoui and Linda Badri and
                 Mourad Badri",
  title =        "Modeling {Web} quality using a probabilistic approach:
                 {An} empirical validation",
  journal =      j-TWEB,
  volume =       "4",
  number =       "3",
  pages =        "9:1--9:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1806916.1806918",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Web-based applications are software systems that
                 continuously evolve to meet users' needs and to adapt
                 to new technologies. Assuring their quality is then a
                 difficult, but essential task. In fact, a large number
                 of factors can affect their quality. Considering these
                 factors and their interaction involves managing
                 uncertainty and subjectivity inherent to this kind of
                 applications. In this article, we present a
                 probabilistic approach for building Web quality models
                 and the associated assessment method. The proposed
                 approach is based on Bayesian Networks. A model is
                 built following a four-step process consisting in
                 collecting quality characteristics, refining them,
                 building a model structure, and deriving the model
                 parameters.\par

                 The feasibility of the approach is illustrated on the
                 important quality characteristic of {\em Navigability
                 design}. To validate the produced model, we conducted
                 an experimental study with 20 subjects and 40 web
                 pages. The results obtained show that the scores given
                 by the used model are strongly correlated with
                 navigability as perceived and experienced by the
                 users.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Bayesian networks; Navigability design; probabilistic
                 approach; quality evaluation; Web applications",
}

@Article{Poblete:2010:PPQ,
  author =       "Barbara Poblete and Myra Spiliopoulou and Ricardo
                 Baeza-Yates",
  title =        "Privacy-preserving query log mining for business
                 confidentiality protection",
  journal =      j-TWEB,
  volume =       "4",
  number =       "3",
  pages =        "10:1--10:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1806916.1806919",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We introduce the concern of confidentiality protection
                 of business information for the publication of search
                 engine query logs and derived data. We study business
                 confidentiality, as the protection of nonpublic data
                 from institutions, such as companies and people in the
                 public eye. In particular, we relate this concern to
                 the involuntary exposure of confidential Web site
                 information, and we transfer this problem into the
                 field of privacy-preserving data mining. We
                 characterize the possible adversaries interested in
                 disclosing Web site confidential data and the attack
                 strategies that they could use. These attacks are based
                 on different vulnerabilities found in query log for
                 which we present several anonymization heuristics to
                 prevent them. We perform an experimental evaluation to
                 estimate the remaining utility of the log after the
                 application of our anonymization techniques. Our
                 experimental results show that a query log can be
                 anonymized against these specific attacks while
                 retaining a significant volume of useful data.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Privacy preservation; queries; query log publication;
                 Web sites",
}

@Article{Consens:2010:EXW,
  author =       "Mariano P. Consens and Ren{\'e}e J. Miller and Flavio
                 Rizzolo and Alejandro A. Vaisman",
  title =        "Exploring {XML} {Web} collections with {DescribeX}",
  journal =      j-TWEB,
  volume =       "4",
  number =       "3",
  pages =        "11:1--11:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1806916.1806920",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "As Web applications mature and evolve, the nature of
                 the semistructured data that drives these applications
                 also changes. An important trend is the need for
                 increased flexibility in the structure of Web
                 documents. Hence, applications cannot rely solely on
                 schemas to provide the complex knowledge needed to
                 visualize, use, query and manage documents. Even when
                 XML Web documents are valid with regard to a schema,
                 the actual structure of such documents may exhibit
                 significant variations across collections for several
                 reasons: the schema may be very lax (e.g., RSS feeds),
                 the schema may be large and different subsets of it may
                 be used in different documents (e.g., industry
                 standards like UBL), or open content models may allow
                 arbitrary schemas to be mixed (e.g., RSS extensions
                 like those used for podcasting). For these reasons,
                 many applications that incorporate XPath queries to
                 process a large Web document collection require an
                 understanding of the actual structure present in the
                 collection, and not just the schema.\par

                 To support modern Web applications, we introduce
                 DescribeX, a powerful framework that is capable of
                 describing complex XML summaries of Web collections.
                 DescribeX supports the construction of heterogeneous
                 summaries that can be declaratively defined and refined
                 by means of axis path regular expression (AxPREs).
                 AxPREs provide the flexibility necessary for
                 declaratively defining complex mappings between
                 instance nodes (in the documents) and summary nodes.
                 These mappings are capable of expressing order and
                 cardinality, among other properties, which can
                 significantly help in the understanding of the
                 structure of large collections of XML documents and
                 enhance the performance of Web applications over these
                 collections. DescribeX captures most summary proposals
                 in the literature by providing (for the first time) a
                 common declarative definition for them. Experimental
                 results demonstrate the scalability of DescribeX
                 summary operations (summary creation, as well as
                 refinement and stabilization, two key enablers for
                 tailoring summaries) on multi-gigabyte Web
                 collections.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Semistructured data; structural summaries; XML;
                 XPath",
}

@Article{Adams:2010:DLS,
  author =       "Brett Adams and Dinh Phung and Svetha Venkatesh",
  title =        "Discovery of latent subcommunities in a blog's
                 readership",
  journal =      j-TWEB,
  volume =       "4",
  number =       "3",
  pages =        "12:1--12:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1806916.1806921",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Sat Aug 14 15:42:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The blogosphere has grown to be a mainstream forum of
                 social interaction as well as a commercially attractive
                 source of information and influence. Tools are needed
                 to better understand how communities that adhere to
                 individual blogs are constituted in order to facilitate
                 new personal, socially-focused browsing paradigms, and
                 understand how blog content is consumed, which is of
                 interest to blog authors, big media, and search. We
                 present a novel approach to blog subcommunity
                 characterization by modeling individual blog readers
                 using mixtures of an extension to the LDA family that
                 jointly models phrases and time, Ngram Topic over Time
                 (NTOT), and cluster with a number of similarity
                 measures using Affinity Propagation. We experiment with
                 two datasets: a small set of blogs whose authors
                 provide feedback, and a set of popular, highly
                 commented blogs, which provide indicators of algorithm
                 scalability and interpretability without prior
                 knowledge of a given blog. The results offer useful
                 insight to the blog authors about their commenting
                 community, and are observed to offer an integrated
                 perspective on the topics of discussion and members
                 engaged in those discussions for unfamiliar blogs. Our
                 approach also holds promise as a component of solutions
                 to related problems, such as online entity resolution
                 and role discovery.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "affinity propagation; Blog; topic models; Web
                 communities",
}

@Article{Kiciman:2010:APR,
  author =       "Emre Kiciman and Benjamin Livshits",
  title =        "{AjaxScope}: {A} Platform for Remotely Monitoring the
                 Client-Side Behavior of {Web 2.0} Applications",
  journal =      j-TWEB,
  volume =       "4",
  number =       "4",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1841909.1841910",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Nov 23 12:48:27 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bex:2010:LDR,
  author =       "Geert Jan Bex and Wouter Gelade and Frank Neven and
                 Stijn Vansummeren",
  title =        "Learning Deterministic Regular Expressions for the
                 Inference of Schemas from {XML} Data",
  journal =      j-TWEB,
  volume =       "4",
  number =       "4",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1841909.1841911",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Nov 23 12:48:27 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bailey:2010:MHQ,
  author =       "Peter Bailey and Ryen W. White and Han Liu and
                 Giridhar Kumaran",
  title =        "Mining Historic Query Trails to Label Long and Rare
                 Search Engine Queries",
  journal =      j-TWEB,
  volume =       "4",
  number =       "4",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1841909.1841912",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Nov 23 12:48:27 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Claude:2010:FCW,
  author =       "Francisco Claude and Gonzalo Navarro",
  title =        "Fast and Compact {Web} Graph Representations",
  journal =      j-TWEB,
  volume =       "4",
  number =       "4",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1841909.1841913",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Nov 23 12:48:27 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Swaminathan:2010:RRM,
  author =       "Ashwin Swaminathan and Renan G. Cattelan and Ydo
                 Wexler and Cherian V. Mathew and Darko Kirovski",
  title =        "Relating Reputation and Money in Online Markets",
  journal =      j-TWEB,
  volume =       "4",
  number =       "4",
  pages =        "17:1--17:??",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1841909.1841914",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Nov 23 12:48:27 MST 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Riedl:2011:ISI,
  author =       "John Riedl and Barry Smyth",
  title =        "Introduction to special issue on recommender systems",
  journal =      j-TWEB,
  volume =       "5",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1921591.1921592",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Mon Mar 28 11:56:06 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Cacheda:2011:CCF,
  author =       "Fidel Cacheda and V{\'\i}ctor Carneiro and Diego
                 Fern{\'a}ndez and Vreixo Formoso",
  title =        "Comparison of collaborative filtering algorithms:
                 Limitations of current techniques and proposals for
                 scalable, high-performance recommender systems",
  journal =      j-TWEB,
  volume =       "5",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1921591.1921593",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Mon Mar 28 11:56:06 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Umyarov:2011:UEA,
  author =       "Akhmed Umyarov and Alexander Tuzhilin",
  title =        "Using external aggregate ratings for improving
                 individual recommendations",
  journal =      j-TWEB,
  volume =       "5",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1921591.1921594",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Mon Mar 28 11:56:06 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Song:2011:ATR,
  author =       "Yang Song and Lu Zhang and C. Lee Giles",
  title =        "Automatic tag recommendation algorithms for social
                 recommender systems",
  journal =      j-TWEB,
  volume =       "5",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1921591.1921595",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Mon Mar 28 11:56:06 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Zheng:2011:RFL,
  author =       "Yu Zheng and Lizhu Zhang and Zhengxin Ma and Xing Xie
                 and Wei-Ying Ma",
  title =        "Recommending friends and locations based on individual
                 location history",
  journal =      j-TWEB,
  volume =       "5",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1921591.1921596",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Mon Mar 28 11:56:06 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Wu:2011:TDQ,
  author =       "Mingfang Wu and Falk Scholer and Andrew Turpin",
  title =        "Topic Distillation with Query-Dependent Link
                 Connections and Page Characteristics",
  journal =      j-TWEB,
  volume =       "5",
  number =       "2",
  pages =        "6:1--6:??",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1961659.1961660",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Jun 7 18:44:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Hurley:2011:HBP,
  author =       "John Hurley and Emi Garcia-Palacios and Sakir Sezer",
  title =        "Host-Based {P2P} Flow Identification and Use in
                 Real-Time",
  journal =      j-TWEB,
  volume =       "5",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1961659.1961661",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Jun 7 18:44:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Mitra:2011:CWB,
  author =       "Siddharth Mitra and Mayank Agrawal and Amit Yadav and
                 Niklas Carlsson and Derek Eager and Anirban Mahanti",
  title =        "Characterizing {Web}-Based Video Sharing Workloads",
  journal =      j-TWEB,
  volume =       "5",
  number =       "2",
  pages =        "8:1--8:??",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1961659.1961662",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Jun 7 18:44:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Ozcan:2011:CAS,
  author =       "Rifat Ozcan and Ismail Sengor Altingovde and
                 {\"O}zg{\"u}r Ulusoy",
  title =        "Cost-Aware Strategies for Query Result Caching in
                 {Web} Search Engines",
  journal =      j-TWEB,
  volume =       "5",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1961659.1961663",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Jun 7 18:44:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Valderas:2011:SRS,
  author =       "Pedro Valderas and Vicente Pelechano",
  title =        "A Survey of Requirements Specification in Model-Driven
                 Development of {Web} Applications",
  journal =      j-TWEB,
  volume =       "5",
  number =       "2",
  pages =        "10:1--10:??",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1961659.1961664",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Jun 7 18:44:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Grier:2011:DIO,
  author =       "Chris Grier and Shuo Tang and Samuel T. King",
  title =        "Designing and Implementing the {OP} and {OP2} {Web}
                 Browsers",
  journal =      j-TWEB,
  volume =       "5",
  number =       "2",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1961659.1961665",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Tue Jun 7 18:44:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Koutsonikola:2011:CDL,
  author =       "Vassiliki Koutsonikola and Athena Vakali",
  title =        "A Clustering-Driven {LDAP} Framework",
  journal =      j-TWEB,
  volume =       "5",
  number =       "3",
  pages =        "12:1--12:??",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1993053.1993054",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Thu Aug 18 13:57:29 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Paci:2011:AAC,
  author =       "Federica Paci and Massimo Mecella and Mourad Ouzzani
                 and Elisa Bertino",
  title =        "{ACConv} -- An Access Control Model for Conversational
                 {Web} Services",
  journal =      j-TWEB,
  volume =       "5",
  number =       "3",
  pages =        "13:1--13:??",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1993053.1993055",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Thu Aug 18 13:57:29 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Zeginis:2011:CDR,
  author =       "Dimitris Zeginis and Yannis Tzitzikas and Vassilis
                 Christophides",
  title =        "On Computing Deltas of {RDF/S} Knowledge Bases",
  journal =      j-TWEB,
  volume =       "5",
  number =       "3",
  pages =        "14:1--14:??",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1993053.1993056",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Thu Aug 18 13:57:29 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Baykan:2011:CSF,
  author =       "Eda Baykan and Monika Henzinger and Ludmila Marian and
                 Ingmar Weber",
  title =        "A Comprehensive Study of Features and Algorithms for
                 {URL}-Based Topic Classification",
  journal =      j-TWEB,
  volume =       "5",
  number =       "3",
  pages =        "15:1--15:??",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1993053.1993057",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Thu Aug 18 13:57:29 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Tuchinda:2011:BMD,
  author =       "Rattapoom Tuchinda and Craig A. Knoblock and Pedro
                 Szekely",
  title =        "Building Mashups by Demonstration",
  journal =      j-TWEB,
  volume =       "5",
  number =       "3",
  pages =        "16:1--16:??",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1993053.1993058",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  bibdate =      "Thu Aug 18 13:57:29 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Alzoubi:2011:PAA,
  author =       "Hussein A. Alzoubi and Seungjoon Lee and Michael
                 Rabinovich and Oliver Spatscheck and Jacobus {Van Der
                 Merwe}",
  title =        "A Practical Architecture for an {Anycast CDN}",
  journal =      j-TWEB,
  volume =       "5",
  number =       "4",
  pages =        "17:1--17:??",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2019643.2019644",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:40 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "IP Anycast has many attractive features for any
                 service that involve the replication of multiple
                 instances across the Internet. IP Anycast allows
                 multiple instances of the same service to be
                 `naturally' discovered, and requests for this service
                 to be delivered to the closest instance. However, while
                 briefly considered as an enabler for content delivery
                 networks (CDNs) when they first emerged, IP Anycast was
                 deemed infeasible in that environment. The main reasons
                 for this decision were the lack of load awareness of IP
                 Anycast and unwanted side effects of Internet routing
                 changes on the IP Anycast mechanism. In this article we
                 re-evaluate IP Anycast for CDNs by proposing a
                 load-aware IP Anycast CDN architecture. Our
                 architecture is prompted by recent developments in
                 route control technology, as well as better
                 understanding of the behavior of IP Anycast in
                 operational settings. Our architecture makes use of
                 route control mechanisms to take server and network
                 load into account to realize load-aware Anycast. We
                 show that the resulting redirection requirements can be
                 formulated as a Generalized Assignment Problem and
                 present practical algorithms that address these
                 requirements while at the same time limiting connection
                 disruptions that plague regular IP Anycast. We evaluate
                 our algorithms through trace based simulation using
                 traces obtained from a production CDN network.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bar-Yossef:2011:ESE,
  author =       "Ziv Bar-Yossef and Maxim Gurevich",
  title =        "Efficient Search Engine Measurements",
  journal =      j-TWEB,
  volume =       "5",
  number =       "4",
  pages =        "18:1--18:??",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2019643.2019645",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:40 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We address the problem of externally measuring
                 aggregate functions over documents indexed by search
                 engines, like corpus size, index freshness, and density
                 of duplicates in the corpus. State of the art
                 estimators for such quantities [Bar-Yossef and Gurevich
                 2008b; Broder et al. 2006] are biased due to inaccurate
                 approximation of the so called `document degrees'. In
                 addition, the estimators in Bar-Yossef and Gurevich
                 [2008b] are quite costly, due to their reliance on
                 rejection sampling. We present new estimators that are
                 able to overcome the bias introduced by approximate
                 degrees. Our estimators are based on a careful
                 implementation of an approximate importance sampling
                 procedure. Comprehensive theoretical and empirical
                 analysis of the estimators demonstrates that they have
                 essentially no bias even in situations where document
                 degrees are poorly approximated. By avoiding the costly
                 rejection sampling approach, our new importance
                 sampling estimators are significantly more efficient
                 than the estimators proposed in Bar-Yossef and Gurevich
                 [2008b]. Furthermore, building on an idea from Broder
                 et al. [2006], we discuss Rao-Blackwellization as a
                 generic method for reducing variance in search engine
                 estimators. We show that Rao-Blackwellizing our
                 estimators results in performance improvements, without
                 compromising accuracy.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Gill:2011:COU,
  author =       "Phillipa Gill and Martin Arlitt and Niklas Carlsson
                 and Anirban Mahanti and Carey Williamson",
  title =        "Characterizing Organizational Use of {Web}-Based
                 Services: Methodology, Challenges, Observations, and
                 Insights",
  journal =      j-TWEB,
  volume =       "5",
  number =       "4",
  pages =        "19:1--19:??",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2019643.2019646",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:40 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Today's Web provides many different functionalities,
                 including communication, entertainment, social
                 networking, and information retrieval. In this article,
                 we analyze traces of HTTP activity from a large
                 enterprise and from a large university to identify and
                 characterize Web-based service usage. Our work provides
                 an initial methodology for the analysis of Web-based
                 services. While it is nontrivial to identify the
                 classes, instances, and providers for each transaction,
                 our results show that most of the traffic comes from a
                 small subset of providers, which can be classified
                 manually. Furthermore, we assess both qualitatively and
                 quantitatively how the Web has evolved over the past
                 decade, and discuss the implications of these
                 changes.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Singla:2011:CBC,
  author =       "Adish Singla and Ingmar Weber",
  title =        "Camera Brand Congruence and Camera Model Propagation
                 in the {Flickr} Social Graph",
  journal =      j-TWEB,
  volume =       "5",
  number =       "4",
  pages =        "20:1--20:??",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2019643.2019647",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:40 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Given that my friends on Flickr use cameras of brand
                 X, am I more likely to also use a camera of brand X?
                 Given that one of these friends changes her brand, am I
                 likely to do the same? Do new camera models pop up
                 uniformly in the friendship graph? Or do early adopters
                 then `convert' their friends? Which factors influence
                 the conversion probability of a user? These are the
                 kind of questions addressed in this work. Direct
                 applications involve personalized advertising in social
                 networks. For our study, we crawled a complete
                 connected component of the Flickr friendship graph with
                 a total of 67M edges and 3.9M users. 1.2M of these
                 users had at least one public photograph with valid
                 model metadata, which allowed us to assign camera
                 brands and models to users and time slots. Similarly,
                 we used, where provided in a user's profile,
                 information about a user's geographic location and the
                 groups joined on Flickr. Concerning brand congruence,
                 our main findings are the following. First, a pair of
                 friends on Flickr has a higher probability of being
                 congruent, that is, using the same brand, compared to
                 two random users (27\% vs. 19\%). Second, the degree of
                 congruence goes up for pairs of friends (i) in the same
                 country (29\%), (ii) who both only have very few
                 friends (30\%), and (iii) with a very high cliqueness
                 (38\%). Third, given that a user changes her camera
                 model between March-May 2007 and March-May 2008, high
                 cliqueness friends are more likely than random users to
                 do the same (54\% vs. 48\%). Fourth, users using
                 high-end cameras are far more loyal to their brand than
                 users using point-and-shoot cameras, with a probability
                 of staying with the same brand of 60\% vs 33\%, given
                 that a new camera is bought. Fifth, these `expert'
                 users' brand congruence reaches 66\% for high
                 cliqueness friends. All these differences are
                 statistically significant at 1\%. As for the
                 propagation of new models in the friendship graph, we
                 observe the following. First, the growth of connected
                 components of users converted to a particular, new
                 camera model differs distinctly from random growth.
                 Second, the decline of dissemination of a particular
                 model is close to random decline. This illustrates that
                 users influence their friends to change to a particular
                 new model, rather than from a particular old model.
                 Third, having many converted friends increases the
                 probability of the user to convert herself. Here
                 differences between friends from the same or from
                 different countries are more pronounced for
                 point-and-shoot than for digital single-lens reflex
                 users. Fourth, there was again a distinct difference
                 between arbitrary friends and high cliqueness friends
                 in terms of prediction quality for conversion.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Curlango-Rosas:2011:SSA,
  author =       "Cecilia Curlango-Rosas and Gregorio A. Ponce and
                 Gabriel A. Lopez-Morteo",
  title =        "A Specialized Search Assistant for Learning Objects",
  journal =      j-TWEB,
  volume =       "5",
  number =       "4",
  pages =        "21:1--21:??",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2019643.2019648",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:40 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The Web holds a great quantity of material that can be
                 used to enhance classroom instruction. However, it is
                 not easy to retrieve this material with the search
                 engines currently available. This study produced a
                 specialized search assistant based on Google that
                 significantly increases the number of instances in
                 which teachers find the desired learning objects as
                 compared to using this popular public search engine
                 directly. Success in finding learning objects by study
                 participants went from 80\% using Google alone to 96\%
                 when using our search assistant in one scenario and, in
                 another scenario, from a 40\% success rate with Google
                 alone to 66\% with our assistant. This specialized
                 search assistant implements features such as bilingual
                 search and term suggestion which were requested by
                 teacher participants to help improve their searches.
                 Study participants evaluated the specialized search
                 assistant and found it significantly easier to use and
                 more useful than the popular search engine for the
                 purpose of finding learning objects.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Zhu:2012:CLS,
  author =       "Guangyu Zhu and Gilad Mishne",
  title =        "{ClickRank}: Learning Session-Context Models to Enrich
                 {Web} Search Ranking",
  journal =      j-TWEB,
  volume =       "6",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2109205.2109206",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:41 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "User browsing information, particularly
                 non-search-related activity, reveals important
                 contextual information on the preferences and intents
                 of Web users. In this article, we demonstrate the
                 importance of mining general Web user behavior data to
                 improve ranking and other Web-search experience, with
                 an emphasis on analyzing individual user sessions for
                 creating aggregate models. In this context, we
                 introduce ClickRank, an efficient, scalable algorithm
                 for estimating Webpage and Website importance from
                 general Web user-behavior data. We lay out the
                 theoretical foundation of ClickRank based on an
                 intentional surfer model and discuss its properties. We
                 quantitatively evaluate its effectiveness regarding the
                 problem of Web-search ranking, showing that it
                 contributes significantly to retrieval performance as a
                 novel Web-search feature. We demonstrate that the
                 results produced by ClickRank for Web-search ranking
                 are highly competitive with those produced by other
                 approaches, yet achieved at better scalability and
                 substantially lower computational costs. Finally, we
                 discuss novel applications of ClickRank in providing
                 enriched user Web-search experience, highlighting the
                 usefulness of our approach for nonranking tasks.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Liu:2012:IWS,
  author =       "Yiqun Liu and Fei Chen and Weize Kong and Huijia Yu
                 and Min Zhang and Shaoping Ma and Liyun Ru",
  title =        "Identifying {Web} Spam with the Wisdom of the Crowds",
  journal =      j-TWEB,
  volume =       "6",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2109205.2109207",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:41 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Combating Web spam has become one of the top
                 challenges for Web search engines. State-of-the-art
                 spam-detection techniques are usually designed for
                 specific, known types of Web spam and are incapable of
                 dealing with newly appearing spam types efficiently.
                 With user-behavior analyses from Web access logs, a
                 spam page-detection algorithm is proposed based on a
                 learning scheme. The main contributions are the
                 following. (1) User-visiting patterns of spam pages are
                 studied, and a number of user-behavior features are
                 proposed for separating Web spam pages from ordinary
                 pages. (2) A novel spam-detection framework is proposed
                 that can detect various kinds of Web spam, including
                 newly appearing ones, with the help of the
                 user-behavior analysis. Experiments on large-scale
                 practical Web access log data show the effectiveness of
                 the proposed features and the detection framework.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Mesbah:2012:CAB,
  author =       "Ali Mesbah and Arie van Deursen and Stefan Lenselink",
  title =        "Crawling {Ajax}-Based {Web} Applications through
                 Dynamic Analysis of User Interface State Changes",
  journal =      j-TWEB,
  volume =       "6",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2109205.2109208",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:41 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Using JavaScript and dynamic DOM manipulation on the
                 client side of Web applications is becoming a
                 widespread approach for achieving rich interactivity
                 and responsiveness in modern Web applications. At the
                 same time, such techniques---collectively known as
                 Ajax---shatter the concept of webpages with unique
                 URLs, on which traditional Web crawlers are based. This
                 article describes a novel technique for crawling
                 Ajax-based applications through automatic dynamic
                 analysis of user-interface-state changes in Web
                 browsers. Our algorithm scans the DOM tree, spots
                 candidate elements that are capable of changing the
                 state, fires events on those candidate elements, and
                 incrementally infers a state machine that models the
                 various navigational paths and states within an Ajax
                 application. This inferred model can be used in program
                 comprehension and in analysis and testing of dynamic
                 Web states, for instance, or for generating a static
                 version of the application. In this article, we discuss
                 our sequential and concurrent Ajax crawling algorithms.
                 We present our open source tool called Crawljax, which
                 implements the concepts and algorithms discussed in
                 this article. Additionally, we report a number of
                 empirical studies in which we apply our approach to a
                 number of open-source and industrial Web applications
                 and elaborate on the obtained results.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Lauw:2012:QLO,
  author =       "Hady W. Lauw and Ee-Peng Lim and Ke Wang",
  title =        "Quality and Leniency in Online Collaborative Rating
                 Systems",
  journal =      j-TWEB,
  volume =       "6",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2109205.2109209",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Fri Mar 16 12:37:41 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The emerging trend of social information processing
                 has resulted in Web users' increased reliance on
                 user-generated content contributed by others for
                 information searching and decision making. Rating
                 scores, a form of user-generated content contributed by
                 reviewers in online rating systems, allow users to
                 leverage others' opinions in the evaluation of objects.
                 In this article, we focus on the problem of summarizing
                 the rating scores given to an object into an overall
                 score that reflects the object's quality. We observe
                 that the existing approaches for summarizing scores
                 largely ignores the effect of reviewers exercising
                 different standards in assigning scores. Instead of
                 treating all reviewers as equals, our approach models
                 the leniency of reviewers, which refers to the tendency
                 of a reviewer to assign higher scores than other
                 coreviewers. Our approach is underlined by two
                 insights: (1) The leniency of a reviewer depends not
                 only on how the reviewer rates objects, but also on how
                 other reviewers rate those objects and (2) The leniency
                 of a reviewer and the quality of rated objects are
                 mutually dependent. We develop the leniency-aware
                 quality, or LQ model, which solves leniency and quality
                 simultaneously. We introduce both an exact and a ranked
                 solution to the model. Experiments on real-life and
                 synthetic datasets show that LQ is more effective than
                 comparable approaches. LQ is also shown to perform
                 consistently better under different parameter
                 settings.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Ashman:2012:E,
  author =       "Helen Ashman and Arun Iyengar and Marc Najork",
  title =        "Editorial",
  journal =      j-TWEB,
  volume =       "6",
  number =       "2",
  pages =        "5:1--5:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180861.2180862",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:48 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{DeCapitaniDiVimercati:2012:ITM,
  author =       "Sabrina {De Capitani Di Vimercati} and Sara Foresti
                 and Sushil Jajodia and Stefano Paraboschi and Giuseppe
                 Psaila and Pierangela Samarati",
  title =        "Integrating trust management and access control in
                 data-intensive {Web} applications",
  journal =      j-TWEB,
  volume =       "6",
  number =       "2",
  pages =        "6:1--6:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180861.2180863",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:48 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The widespread diffusion of Web-based services
                 provided by public and private organizations emphasizes
                 the need for a flexible solution for protecting the
                 information accessible through Web applications. A
                 promising approach is represented by credential-based
                 access control and trust management. However, although
                 much research has been done and several proposals
                 exist, a clear obstacle to the realization of their
                 benefits in data-intensive Web applications is
                 represented by the lack of adequate support in the
                 DBMSs. As a matter of fact, DBMSs are often responsible
                 for the management of most of the information that is
                 accessed using a Web browser or a Web service
                 invocation. In this article, we aim at eliminating this
                 gap, and present an approach integrating trust
                 management with the access control of the DBMS. We
                 propose a trust model with a SQL syntax and illustrate
                 an algorithm for the efficient verification of a
                 delegation path for certificates. Our solution nicely
                 complements current trust management proposals allowing
                 the efficient realization of the services of an
                 advanced trust management model within current
                 relational DBMSs. An important benefit of our approach
                 lies in its potential for a robust end-to-end design of
                 security for personal data in Web scenario, where
                 vulnerabilities of Web applications cannot be used to
                 violate the protection of the data residing on the
                 database server. We also illustrate the implementation
                 of our approach within an open-source DBMS discussing
                 design choices and performance impact.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Alrifai:2012:HAE,
  author =       "Mohammad Alrifai and Thomas Risse and Wolfgang Nejdl",
  title =        "A hybrid approach for efficient {Web} service
                 composition with end-to-end {QoS} constraints",
  journal =      j-TWEB,
  volume =       "6",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180861.2180864",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:48 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Dynamic selection of Web services at runtime is
                 important for building flexible and loosely-coupled
                 service-oriented applications. An abstract description
                 of the required services is provided at design-time,
                 and matching service offers are located at runtime.
                 With the growing number of Web services that provide
                 the same functionality but differ in quality parameters
                 (e.g., availability, response time), a decision needs
                 to be made on which services should be selected such
                 that the user's end-to-end QoS requirements are
                 satisfied. Although very efficient, local selection
                 strategy fails short in handling global QoS
                 requirements. Solutions based on global optimization,
                 on the other hand, can handle global constraints, but
                 their poor performance renders them inappropriate for
                 applications with dynamic and realtime requirements. In
                 this article we address this problem and propose a
                 hybrid solution that combines global optimization with
                 local selection techniques to benefit from the
                 advantages of both worlds. The proposed solution
                 consists of two steps: first, we use mixed integer
                 programming (MIP) to find the optimal decomposition of
                 global QoS constraints into local constraints. Second,
                 we use distributed local selection to find the best Web
                 services that satisfy these local constraints. The
                 results of experimental evaluation indicate that our
                 approach significantly outperforms existing solutions
                 in terms of computation time while achieving
                 close-to-optimal results.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Desnoyers:2012:MAM,
  author =       "Peter Desnoyers and Timothy Wood and Prashant Shenoy
                 and Rahul Singh and Sangameshwar Patil and Harrick
                 Vin",
  title =        "{Modellus}: Automated modeling of complex {Internet}
                 data center applications",
  journal =      j-TWEB,
  volume =       "6",
  number =       "2",
  pages =        "8:1--8:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180861.2180865",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:48 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The rising complexity of distributed server
                 applications in Internet data centers has made the
                 tasks of modeling and analyzing their behavior
                 increasingly difficult. This article presents Modellus,
                 a novel system for automated modeling of complex
                 web-based data center applications using methods from
                 queuing theory, data mining, and machine learning.
                 Modellus uses queuing theory and statistical methods to
                 automatically derive models to predict the resource
                 usage of an application and the workload it triggers;
                 these models can be composed to capture multiple
                 dependencies between interacting applications. Model
                 accuracy is maintained by fast, distributed testing,
                 automated relearning of models when they change, and
                 methods to bound prediction errors in composite models.
                 We have implemented a prototype of Modellus, deployed
                 it on a data center testbed, and evaluated its efficacy
                 for modeling and analysis of several distributed
                 multitier web applications. Our results show that this
                 feature-based modeling technique is able to make
                 predictions across several data center tiers, and
                 maintain predictive accuracy (typically 95\% or better)
                 in the face of significant shifts in workload
                 composition; we also demonstrate practical applications
                 of the Modellus system to prediction and provisioning
                 of real-world data center applications.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Aiello:2012:FPH,
  author =       "Luca Maria Aiello and Alain Barrat and Rossano
                 Schifanella and Ciro Cattuto and Benjamin Markines and
                 Filippo Menczer",
  title =        "Friendship prediction and homophily in social media",
  journal =      j-TWEB,
  volume =       "6",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180861.2180866",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:48 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Social media have attracted considerable attention
                 because their open-ended nature allows users to create
                 lightweight semantic scaffolding to organize and share
                 content. To date, the interplay of the social and
                 topical components of social media has been only
                 partially explored. Here, we study the presence of
                 homophily in three systems that combine tagging social
                 media with online social networks. We find a
                 substantial level of topical similarity among users who
                 are close to each other in the social network. We
                 introduce a null model that preserves user activity
                 while removing local correlations, allowing us to
                 disentangle the actual local similarity between users
                 from statistical effects due to the assortative mixing
                 of user activity and centrality in the social network.
                 This analysis suggests that users with similar
                 interests are more likely to be friends, and therefore
                 topical similarity measures among users based solely on
                 their annotation metadata should be predictive of
                 social links. We test this hypothesis on several
                 datasets, confirming that social networks constructed
                 from topical similarity capture actual friendship
                 accurately. When combined with topological features,
                 topical similarity achieves a link prediction accuracy
                 of about 92\%.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Comai:2012:MDM,
  author =       "Sara Comai and Davide Mazza",
  title =        "A model-driven methodology to the content layout
                 problem in {Web} applications",
  journal =      j-TWEB,
  volume =       "6",
  number =       "3",
  pages =        "10:1--10:38",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2344416.2344417",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:49 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/texbook3.bib;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "This article presents a model-driven approach for the
                 design of the layout in a complex Web application,
                 where large amounts of data are accessed. The aim of
                 this work is to reduce, as much as possible, repetitive
                 tasks and to factor out common aspects into different
                 kinds of rules that can be reused across different
                 applications. In particular, exploiting the conceptual
                 elements of the typical models used for the design of a
                 Web application, it defines presentation and layout
                 rules at different levels of abstraction and
                 granularity. A procedure for the automatic layout of
                 the content of a page is proposed and evaluated, and
                 the layout of advanced Web applications is discussed.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
  keywords =     "Automatic contents layout; graphical visualization and
                 rendering; Web applications design",
}

@Article{Merhav:2012:EIN,
  author =       "Yuval Merhav and Filipe Mesquita and Denilson Barbosa
                 and Wai Gen Yee and Ophir Frieder",
  title =        "Extracting information networks from the blogosphere",
  journal =      j-TWEB,
  volume =       "6",
  number =       "3",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2344416.2344418",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:49 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We study the problem of automatically extracting
                 information networks formed by recognizable entities as
                 well as relations among them from social media sites.
                 Our approach consists of using state-of-the-art natural
                 language processing tools to identify entities and
                 extract sentences that relate such entities, followed
                 by using text-clustering algorithms to identify the
                 relations within the information network. We propose a
                 new term-weighting scheme that significantly improves
                 on the state-of-the-art in the task of relation
                 extraction, both when used in conjunction with the
                 standard tf $ \cdot $ idf scheme and also when used as
                 a pruning filter. We describe an effective method for
                 identifying benchmarks for open information extraction
                 that relies on a curated online database that is
                 comparable to the hand-crafted evaluation datasets in
                 the literature. From this benchmark, we derive a much
                 larger dataset which mimics realistic conditions for
                 the task of open information extraction. We report on
                 extensive experiments on both datasets, which not only
                 shed light on the accuracy levels achieved by
                 state-of-the-art open information extraction tools, but
                 also on how to tune such tools for better results.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Miliaraki:2012:FDS,
  author =       "Iris Miliaraki and Manolis Koubarakis",
  title =        "{FoXtrot}: Distributed structural and value {XML}
                 filtering",
  journal =      j-TWEB,
  volume =       "6",
  number =       "3",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2344416.2344419",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:49 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Publish/subscribe systems have emerged in recent years
                 as a promising paradigm for offering various popular
                 notification services. In this context, many XML
                 filtering systems have been proposed to efficiently
                 identify XML data that matches user interests expressed
                 as queries in an XML query language like XPath.
                 However, in order to offer XML filtering functionality
                 on an Internet-scale, we need to deploy such a service
                 in a distributed environment, avoiding bottlenecks that
                 can deteriorate performance. In this work, we design
                 and implement FoXtrot, a system for filtering XML data
                 that combines the strengths of automata for efficient
                 filtering and distributed hash tables for building a
                 fully distributed system. Apart from
                 structural-matching, performed using automata, we also
                 discuss different methods for evaluating value-based
                 predicates. We perform an extensive experimental
                 evaluation of our system, FoXtrot, on a local cluster
                 and on the PlanetLab network and demonstrate that it
                 can index millions of user queries, achieving a high
                 indexing and filtering throughput. At the same time,
                 FoXtrot exhibits very good load-balancing properties
                 and improves its performance as we increase the size of
                 the network.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Dork:2012:NTW,
  author =       "Marian D{\"o}rk and Carey Williamson and Sheelagh
                 Carpendale",
  title =        "Navigating tomorrow's web: From searching and browsing
                 to visual exploration",
  journal =      j-TWEB,
  volume =       "6",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2344416.2344420",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Nov 6 19:07:49 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "We propose a new way of navigating the Web using
                 interactive information visualizations, and present
                 encouraging results from a large-scale Web study of a
                 visual exploration system. While the Web has become an
                 immense, diverse information space, it has also evolved
                 into a powerful software platform. We believe that the
                 established interaction techniques of searching and
                 browsing do not sufficiently utilize these advances,
                 since information seekers have to transform their
                 information needs into specific, text-based search
                 queries resulting in mostly text-based lists of
                 resources. In contrast, we foresee a new type of
                 information seeking that is high-level and more
                 engaging, by providing the information seeker with
                 interactive visualizations that give graphical
                 overviews and enable query formulation. Building on
                 recent work on faceted navigation, information
                 visualization, and exploratory search, we conceptualize
                 this type of information navigation as visual
                 exploration and evaluate a prototype Web-based system
                 that implements it. We discuss the results of a
                 large-scale, mixed-method Web study that provides a
                 better understanding of the potential benefits of
                 visual exploration on the Web, and its particular
                 performance challenges.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Cambazoglu:2012:CBQ,
  author =       "B. Barla Cambazoglu and Ismail Sengor Altingovde and
                 Rifat Ozcan and {\"O}zg{\"u}r Ulusoy",
  title =        "Cache-Based Query Processing for Search Engines",
  journal =      j-TWEB,
  volume =       "6",
  number =       "4",
  pages =        "14:1--14:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2382616.2382617",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In practice, a search engine may fail to serve a query
                 due to various reasons such as hardware/network
                 failures, excessive query load, lack of matching
                 documents, or service contract limitations (e.g., the
                 query rate limits for third-party users of a search
                 service). In this kind of scenarios, where the backend
                 search system is unable to generate answers to queries,
                 approximate answers can be generated by exploiting the
                 previously computed query results available in the
                 result cache of the search engine. In this work, we
                 propose two alternative strategies to implement this
                 cache-based query processing idea. The first strategy
                 aggregates the results of similar queries that are
                 previously cached in order to create synthetic results
                 for new queries. The second strategy forms an inverted
                 index over the textual information (i.e., query terms
                 and result snippets) present in the result cache and
                 uses this index to answer new queries. Both approaches
                 achieve reasonable result qualities compared to
                 processing queries with an inverted index built on the
                 collection.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Delac:2012:MSS,
  author =       "Goran Delac and Ivan Budiselic and Ivan Zuzak and Ivan
                 Skuliber and Tomislav Stefanec",
  title =        "A Methodology for {SIP} and {SOAP} Integration Using
                 Application-Specific Protocol Conversion",
  journal =      j-TWEB,
  volume =       "6",
  number =       "4",
  pages =        "15:1--15:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2382616.2382618",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In recent years, the ubiquitous demands for
                 cross-protocol application access are driving the need
                 for deeper integration between SIP and SOAP. In this
                 article we present a novel methodology for integrating
                 these two protocols. Through an analysis of properties
                 of SIP and SOAP we show that integration between these
                 protocols should be based on application-specific
                 converters. We describe a generic SIP/SOAP gateway that
                 implements message handling and network and storage
                 management while relying on application-specific
                 converters to define session management and message
                 mapping for a specific set of SIP and SOAP
                 communication nodes. In order to ease development of
                 these converters, we introduce an XML-based
                 domain-specific language for describing
                 application-specific conversion processes. We show how
                 conversion processes can be easily specified in the
                 language using message sequence diagrams of the desired
                 interaction. We evaluate the presented methodology
                 through performance analysis of the developed prototype
                 gateway and high-level comparison with other
                 solutions.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Jeon:2012:WCP,
  author =       "Myeongjae Jeon and Youngjae Kim and Jeaho Hwang and
                 Joonwon Lee and Euiseong Seo",
  title =        "Workload Characterization and Performance Implications
                 of Large-Scale Blog Servers",
  journal =      j-TWEB,
  volume =       "6",
  number =       "4",
  pages =        "16:1--16:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2382616.2382619",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "With the ever-increasing popularity of Social Network
                 Services (SNSs), an understanding of the
                 characteristics of these services and their effects on
                 the behavior of their host servers is critical.
                 However, there has been a lack of research on the
                 workload characterization of servers running SNS
                 applications such as blog services. To fill this void,
                 we empirically characterized real-world Web server logs
                 collected from one of the largest South Korean blog
                 hosting sites for 12 consecutive days. The logs consist
                 of more than 96 million HTTP requests and 4.7TB of
                 network traffic. Our analysis reveals the following:
                 (i) The transfer size of nonmultimedia files and blog
                 articles can be modeled using a truncated Pareto
                 distribution and a log-normal distribution,
                 respectively; (ii) user access for blog articles does
                 not show temporal locality, but is strongly biased
                 towards those posted with image or audio files. We
                 additionally discuss the potential performance
                 improvement through clustering of small files on a blog
                 page into contiguous disk blocks, which benefits from
                 the observed file access patterns. Trace-driven
                 simulations show that, on average, the suggested
                 approach achieves 60.6\% better system throughput and
                 reduces the processing time for file access by 30.8\%
                 compared to the best performance of the Ext4
                 filesystem.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Wilson:2012:BSG,
  author =       "Christo Wilson and Alessandra Sala and Krishna P. N.
                 Puttaswamy and Ben Y. Zhao",
  title =        "Beyond Social Graphs: User Interactions in Online
                 Social Networks and their Implications",
  journal =      j-TWEB,
  volume =       "6",
  number =       "4",
  pages =        "17:1--17:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2382616.2382620",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Social networks are popular platforms for interaction,
                 communication, and collaboration between friends.
                 Researchers have recently proposed an emerging class of
                 applications that leverage relationships from social
                 networks to improve security and performance in
                 applications such as email, Web browsing, and overlay
                 routing. While these applications often cite social
                 network connectivity statistics to support their
                 designs, researchers in psychology and sociology have
                 repeatedly cast doubt on the practice of inferring
                 meaningful relationships from social network
                 connections alone. This leads to the question: ``Are
                 social links valid indicators of real user interaction?
                 If not, then how can we quantify these factors to form
                 a more accurate model for evaluating socially enhanced
                 applications?'' In this article, we address this
                 question through a detailed study of user interactions
                 in the Facebook social network. We propose the use of
                 ``interaction graphs'' to impart meaning to online
                 social links by quantifying user interactions. We
                 analyze interaction graphs derived from Facebook user
                 traces and show that they exhibit significantly lower
                 levels of the ``small-world'' properties present in
                 their social graph counterparts. This means that these
                 graphs have fewer ``supernodes'' with extremely high
                 degree, and overall graph diameter increases
                 significantly as a result. To quantify the impact of
                 our observations, we use both types of graphs to
                 validate several well-known social-based applications
                 that rely on graph properties to infuse new
                 functionality into Internet applications, including
                 Reliable Email (RE), SybilGuard, and the weighted
                 cascade influence maximization algorithm. The results
                 reveal new insights into each of these systems, and
                 confirm our hypothesis that to obtain realistic and
                 accurate results, ongoing research on social network
                 applications studies of social applications should use
                 real indicators of user interactions in lieu of social
                 graphs.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Weerkamp:2012:EEC,
  author =       "Wouter Weerkamp and Krisztian Balog and Maarten de
                 Rijke",
  title =        "Exploiting External Collections for Query Expansion",
  journal =      j-TWEB,
  volume =       "6",
  number =       "4",
  pages =        "18:1--18:??",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2382616.2382621",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "A persisting challenge in the field of information
                 retrieval is the vocabulary mismatch between a user's
                 information need and the relevant documents. One way of
                 addressing this issue is to apply query modeling: to
                 add terms to the original query and reweigh the terms.
                 In social media, where documents usually contain
                 creative and noisy language (e.g., spelling and
                 grammatical errors), query modeling proves difficult.
                 To address this, attempts to use external sources for
                 query modeling have been made and seem to be
                 successful. In this article we propose a general
                 generative query expansion model that uses external
                 document collections for term generation: the External
                 Expansion Model (EEM). The main rationale behind our
                 model is our hypothesis that each query requires its
                 own mixture of external collections for expansion and
                 that an expansion model should account for this. For
                 some queries we expect, for example, a news collection
                 to be most beneficial, while for other queries we could
                 benefit more by selecting terms from a general
                 encyclopedia. EEM allows for query-dependent weighing
                 of the external collections. We put our model to the
                 test on the task of blog post retrieval and we use four
                 external collections in our experiments: (i) a news
                 collection, (ii) a Web collection, (iii) Wikipedia, and
                 (iv) a blog post collection. Experiments show that EEM
                 outperforms query expansion on the individual
                 collections, as well as the Mixture of Relevance Models
                 that was previously proposed by Diaz and Metzler
                 [2006]. Extensive analysis of the results shows that
                 our naive approach to estimating query-dependent
                 collection importance works reasonably well and that,
                 when we use ``oracle'' settings, we see the full
                 potential of our model. We also find that the
                 query-dependent collection importance has more impact
                 on retrieval performance than the independent
                 collection importance (i.e., a collection prior).",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Wu:2013:MVC,
  author =       "Ou Wu and Weiming Hu and Lei Shi",
  title =        "Measuring the Visual Complexities of {Web} Pages",
  journal =      j-TWEB,
  volume =       "7",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435215.2435216",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Visual complexities (VisComs) of Web pages
                 significantly affect user experience, and automatic
                 evaluation can facilitate a large number of Web-based
                 applications. The construction of a model for measuring
                 the VisComs of Web pages requires the extraction of
                 typical features and learning based on labeled Web
                 pages. However, as far as the authors are aware, little
                 headway has been made on measuring VisCom in Web mining
                 and machine learning. The present article provides a
                 new approach combining Web mining techniques and
                 machine learning algorithms for measuring the VisComs
                 of Web pages. The structure of a Web page is first
                 analyzed, and the layout is then extracted. Using a Web
                 page as a semistructured image, three classes of
                 features are extracted to construct a feature vector.
                 The feature vector is fed into a learned measuring
                 function to calculate the VisCom of the page. In the
                 proposed approach of the present study, the type of the
                 measuring function and its learning depend on the
                 quantification strategy for VisCom. Aside from using a
                 category and a score to represent VisCom as existing
                 work, this study presents a new strategy utilizing a
                 distribution to quantify the VisCom of a Web page.
                 Empirical evaluation suggests the effectiveness of the
                 proposed approach in terms of both features and
                 learning algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Hanson:2013:PWA,
  author =       "Vicki L. Hanson and John T. Richards",
  title =        "Progress on {Website} Accessibility?",
  journal =      j-TWEB,
  volume =       "7",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435215.2435217",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Over 100 top-traffic and government websites from the
                 United States and United Kingdom were examined for
                 evidence of changes on accessibility indicators over
                 the 14-year period from 1999 to 2012, the longest
                 period studied to date. Automated analyses of WCAG 2.0
                 Level A Success Criteria found high percentages of
                 violations overall. Unlike more circumscribed studies,
                 however, these sites exhibited improvements over the
                 years on a number of accessibility indicators, with
                 government sites being less likely than topsites to
                 have accessibility violations. Examination of the
                 causes of success and failure suggests that improving
                 accessibility may be due, in part, to changes in
                 website technologies and coding practices rather than a
                 focus on accessibility per se.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Baykan:2013:CST,
  author =       "Eda Baykan and Monika Henzinger and Ingmar Weber",
  title =        "A Comprehensive Study of Techniques for {URL}-Based
                 {Web} Page Language Classification",
  journal =      j-TWEB,
  volume =       "7",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435215.2435218",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Given only the URL of a Web page, can we identify its
                 language? In this article we examine this question.
                 URL-based language classification is useful when the
                 content of the Web page is not available or downloading
                 the content is a waste of bandwidth and time. We built
                 URL-based language classifiers for English, German,
                 French, Spanish, and Italian by applying a variety of
                 algorithms and features. As algorithms we used machine
                 learning algorithms which are widely applied for text
                 classification and state-of-art algorithms for language
                 identification of text. As features we used words,
                 various sized n-grams, and custom-made features (our
                 novel feature set). We compared our approaches with two
                 baseline methods, namely classification by country code
                 top-level domains and classification by IP addresses of
                 the hosting Web servers. We trained and tested our
                 classifiers in a 10-fold cross-validation setup on a
                 dataset obtained from the Open Directory Project and
                 from querying a commercial search engine. We obtained
                 the lowest F1-measure for English (94) and the highest
                 F1-measure for German (98) with the best performing
                 classifiers. We also evaluated the performance of our
                 methods: (i) on a set of Web pages written in Adobe
                 Flash and (ii) as part of a language-focused crawler.
                 In the first case, the content of the Web page is hard
                 to extract and in the second page downloading pages of
                 the ``wrong'' language constitutes a waste of
                 bandwidth. In both settings the best classifiers have a
                 high accuracy with an F1-measure between 95 (for
                 English) and 98 (for Italian) for the Adobe Flash pages
                 and a precision between 90 (for Italian) and 97 (for
                 French) for the language-focused crawler.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Marriott:2013:HAT,
  author =       "Kim Marriott and Peter Moulder and Nathan Hurst",
  title =        "{HTML} Automatic Table Layout",
  journal =      j-TWEB,
  volume =       "7",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435215.2435219",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Sun May 5 09:27:25 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Automatic layout of tables is required in online
                 applications because of the need to tailor the layout
                 to the viewport width, choice of font, and dynamic
                 content. However, if the table contains text,
                 minimizing the height of the table for a fixed maximum
                 width is NP-hard. Thus, more efficient heuristic
                 algorithms are required. We evaluate the HTML table
                 layout recommendation and find that while it generally
                 produces quite compact layout it is brittle and can
                 lead to quite uncompact layout. We present an alternate
                 heuristic algorithm. It uses a greedy strategy that
                 starts from the widest reasonable layout and repeatedly
                 chooses to narrow the column for which narrowing leads
                 to the least increase in table height. The algorithm is
                 simple, fast enough to be used in online applications,
                 and gives significantly more compact layout than is
                 obtained with HTML's recommended table layout
                 algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Anisetti:2013:TBS,
  author =       "Marco Anisetti and Claudio A. Ardagna and Ernesto
                 Damiani and Francesco Saonara",
  title =        "A test-based security certification scheme for {Web}
                 services",
  journal =      j-TWEB,
  volume =       "7",
  number =       "2",
  pages =        "5:1--5:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2460383.2460384",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:18 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The Service-Oriented Architecture (SOA) paradigm is
                 giving rise to a new generation of applications built
                 by dynamically composing loosely coupled autonomous
                 services. Clients (i.e., software agents acting on
                 behalf of human users or service providers)
                 implementing such complex applications typically search
                 and integrate services on the basis of their functional
                 requirements and of their trust in the service
                 suppliers. A major issue in this scenario relates to
                 the definition of an assurance technique allowing
                 clients to select services on the basis of their
                 nonfunctional requirements and increasing their
                 confidence that the selected services will satisfy such
                 requirements. In this article, we first present an
                 assurance solution that focuses on security and
                 supports a test-based security certification scheme for
                 Web services. The certification scheme is driven by the
                 security properties to be certified and relies upon a
                 formal definition of the service model. The evidence
                 supporting a certified property is computed using a
                 model-based testing approach that, starting from the
                 service model, automatically generates the test cases
                 to be used in the service certification. We also define
                 a set of indexes and metrics that evaluate the
                 assurance level and the quality of the certification
                 process. Finally, we present our evaluation toolkit and
                 experimental results obtained applying our
                 certification solution to a financial service
                 implementing the Interactive Financial eXchange (IFX)
                 standard.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Victor:2013:ETB,
  author =       "Patricia Victor and Nele Verbiest and Chris Cornelis
                 and Martine {De Cock}",
  title =        "Enhancing the trust-based recommendation process with
                 explicit distrust",
  journal =      j-TWEB,
  volume =       "7",
  number =       "2",
  pages =        "6:1--6:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2460383.2460385",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:18 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "When a Web application with a built-in recommender
                 offers a social networking component which enables its
                 users to form a trust network, it can generate more
                 personalized recommendations by combining user ratings
                 with information from the trust network. These are the
                 so-called trust-enhanced recommendation systems. While
                 research on the incorporation of trust for
                 recommendations is thriving, the potential of
                 explicitly stated distrust remains almost unexplored.
                 In this article, we introduce a distrust-enhanced
                 recommendation algorithm which has its roots in
                 Golbeck's trust-based weighted mean. Through
                 experiments on a set of reviews from Epinions.com, we
                 show that our new algorithm outperforms its standard
                 trust-only counterpart with respect to accuracy,
                 thereby demonstrating the positive effect that explicit
                 distrust can have on trust-based recommendations.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Yue:2013:MSI,
  author =       "Chuan Yue and Haining Wang",
  title =        "A measurement study of insecure {JavaScript} practices
                 on the {Web}",
  journal =      j-TWEB,
  volume =       "7",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2460383.2460386",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:18 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "JavaScript is an interpreted programming language most
                 often used for enhancing webpage interactivity and
                 functionality. It has powerful capabilities to interact
                 with webpage documents and browser windows, however, it
                 has also opened the door for many browser-based
                 security attacks. Insecure engineering practices of
                 using JavaScript may not directly lead to security
                 breaches, but they can create new attack vectors and
                 greatly increase the risks of browser-based attacks. In
                 this article, we present the first measurement study on
                 insecure practices of using JavaScript on the Web. Our
                 focus is on the insecure practices of JavaScript
                 inclusion and dynamic generation, and we examine their
                 severity and nature on 6,805 unique websites. Our
                 measurement results reveal that insecure JavaScript
                 practices are common at various websites: (1) at least
                 66.4\% of the measured websites manifest the insecure
                 practices of including JavaScript files from external
                 domains into the top-level documents of their webpages;
                 (2) over 44.4\% of the measured websites use the
                 dangerous eval() function to dynamically generate and
                 execute JavaScript code on their webpages; and (3) in
                 JavaScript dynamic generation, using the
                 document.write() method and the innerHTML property is
                 much more popular than using the relatively secure
                 technique of creating script elements via DOM methods.
                 Our analysis indicates that safe alternatives to these
                 insecure practices exist in common cases and ought to
                 be adopted by website developers and administrators for
                 reducing potential security risks.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Su:2013:UQI,
  author =       "Weifeng Su and Hejun Wu and Yafei Li and Jing Zhao and
                 Frederick H. Lochovsky and Hongmin Cai and Tianqiang
                 Huang",
  title =        "Understanding query interfaces by statistical
                 parsing",
  journal =      j-TWEB,
  volume =       "7",
  number =       "2",
  pages =        "8:1--8:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2460383.2460387",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:18 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Users submit queries to an online database via its
                 query interface. Query interface parsing, which is
                 important for many applications, understands the query
                 capabilities of a query interface. Since most query
                 interfaces are organized hierarchically, we present a
                 novel query interface parsing method, StatParser
                 (Statistical Parser), to automatically extract the
                 hierarchical query capabilities of query interfaces.
                 StatParser automatically learns from a set of parsed
                 query interfaces and parses new query interfaces.
                 StatParser starts from a small grammar and enhances the
                 grammar with a set of probabilities learned from parsed
                 query interfaces under the maximum-entropy principle.
                 Given a new query interface, the probability-enhanced
                 grammar identifies the parse tree with the largest
                 global probability to be the query capabilities of the
                 query interface. Experimental results show that
                 StatParser very accurately extracts the query
                 capabilities and can effectively overcome the problems
                 of existing query interface parsers.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Diaz:2013:LEU,
  author =       "Oscar D{\'\i}az and Crist{\'o}bal Arellano and Maider
                 Azanza",
  title =        "A language for end-user {Web} augmentation: Caring for
                 producers and consumers alike",
  journal =      j-TWEB,
  volume =       "7",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2460383.2460388",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:18 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Web augmentation is to the Web what augmented reality
                 is to the physical world: layering relevant
                 content/layout/navigation over the existing Web to
                 customize the user experience. This is achieved through
                 JavaScript (JS) using browser weavers (e.g.,
                 Greasemonkey). To date, over 43 million of downloads of
                 Greasemonkey scripts ground the vitality of this
                 movement. However, Web augmentation is hindered by
                 being programming intensive and prone to malware. This
                 prevents end-users from participating as both producers
                 and consumers of scripts: producers need to know JS,
                 consumers need to trust JS. This article aims at
                 promoting end-user participation in both roles. The
                 vision is for end-users to prosume (the act of
                 simultaneously caring for producing and consuming)
                 scripts as easily as they currently prosume their
                 pictures or videos. Encouraging production requires
                 more ``natural'' and abstract constructs. Promoting
                 consumption calls for augmentation scripts to be easier
                 to understand, share, and trust upon. To this end, we
                 explore the use of Domain-Specific Languages (DSLs) by
                 introducing Sticklet. Sticklet is an internal DSL on
                 JS, where JS generality is reduced for the sake of
                 learnability and reliability. Specifically, Web
                 augmentation is conceived as fixing in existing web
                 sites (i.e., the wall ) HTML fragments extracted from
                 either other sites or Web services (i.e., the stickers
                 ). Sticklet targets hobby programmers as producers, and
                 computer literates as consumers. From a producer
                 perspective, benefits are threefold. As a restricted
                 grammar on top of JS, Sticklet expressions are domain
                 oriented and more declarative than their JS
                 counterparts, hence speeding up development. As
                 syntactically correct JS expressions, Sticklet scripts
                 can be installed as traditional scripts and hence,
                 programmers can continue using existing JS tools. As
                 declarative expressions, they are easier to maintain,
                 and amenable for optimization. From a consumer
                 perspective, domain specificity brings
                 understandability (due to declarativeness), reliability
                 (due to built-in security), and ``consumability''
                 (i.e., installation/enactment/sharing of Sticklet
                 expressions are tuned to the shortage of time and
                 skills of the target audience). Preliminary evaluations
                 indicate that 77\% of the subjects were able to develop
                 new Sticklet scripts in less than thirty minutes while
                 84\% were able to consume these scripts in less than
                 ten minutes. Sticklet is available to download as a
                 Mozilla add-on.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Kaldeli:2013:CWS,
  author =       "Eirini Kaldeli and Ehsan Ullah Warriach and Alexander
                 Lazovik and Marco Aiello",
  title =        "Coordinating the web of services for a smart home",
  journal =      j-TWEB,
  volume =       "7",
  number =       "2",
  pages =        "10:1--10:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2460383.2460389",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:18 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Domotics, concerned with the realization of
                 intelligent home environments, is a novel field which
                 can highly benefit from solutions inspired by
                 service-oriented principles to enhance the convenience
                 and security of modern home residents. In this work, we
                 present an architecture for a smart home, starting from
                 the lower device interconnectivity level up to the
                 higher application layers that undertake the load of
                 complex functionalities and provide a number of
                 services to end-users. We claim that in order for smart
                 homes to exhibit a genuinely intelligent behavior, the
                 ability to compute compositions of individual devices
                 automatically and dynamically is paramount. To this
                 end, we incorporate into the architecture a composition
                 component that employs artificial intelligence
                 domain-independent planning to generate compositions at
                 runtime, in a constantly evolving environment. We have
                 implemented a fully working prototype that realizes
                 such an architecture, and have evaluated it both in
                 terms of performance as well as from the end-user point
                 of view. The results of the evaluation show that the
                 service-oriented architectural design and the support
                 for dynamic compositions is quite efficient from the
                 technical point of view, and that the system succeeds
                 in satisfying the expectations and objectives of the
                 users.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Balakrishnan:2013:ART,
  author =       "Raju Balakrishnan and Subbarao Kambhampati and
                 Manishkumar Jha",
  title =        "Assessing relevance and trust of the deep web sources
                 and results based on inter-source agreement",
  journal =      j-TWEB,
  volume =       "7",
  number =       "2",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2460383.2460390",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:18 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Deep web search engines face the formidable challenge
                 of retrieving high-quality results from the vast
                 collection of searchable databases. Deep web search is
                 a two-step process of selecting the high-quality
                 sources and ranking the results from the selected
                 sources. Though there are existing methods for both the
                 steps, they assess the relevance of the sources and the
                 results using the query-result similarity. When applied
                 to the deep web these methods have two deficiencies.
                 First is that they are agnostic to the correctness
                 (trustworthiness) of the results. Second, the
                 query-based relevance does not consider the importance
                 of the results and sources. These two considerations
                 are essential for the deep web and open collections in
                 general. Since a number of deep web sources provide
                 answers to any query, we conjuncture that the
                 agreements between these answers are helpful in
                 assessing the importance and the trustworthiness of the
                 sources and the results. For assessing source quality,
                 we compute the agreement between the sources as the
                 agreement of the answers returned. While computing the
                 agreement, we also measure and compensate for the
                 possible collusion between the sources. This adjusted
                 agreement is modeled as a graph with sources at the
                 vertices. On this agreement graph, a quality score of a
                 source, that we call SourceRank, is calculated as the
                 stationary visit probability of a random walk. For
                 ranking results, we analyze the second-order agreement
                 between the results. Further extending SourceRank to
                 multidomain search, we propose a source ranking
                 sensitive to the query domains. Multiple
                 domain-specific rankings of a source are computed, and
                 these ranks are combined for the final ranking. We
                 perform extensive evaluations on online and hundreds of
                 Google Base sources spanning across domains. The
                 proposed result and source rankings are implemented in
                 the deep web search engine Factal. We demonstrate that
                 the agreement analysis tracks source corruption.
                 Further, our relevance evaluations show that our
                 methods improve precision significantly over Google
                 Base and the other baseline methods. The result ranking
                 and the domain-specific source ranking are evaluated
                 separately.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Nguyen:2013:FWT,
  author =       "Cam-Tu Nguyen and Natsuda Kaothanthong and Takeshi
                 Tokuyama and Xuan-Hieu Phan",
  title =        "A feature-word-topic model for image annotation and
                 retrieval",
  journal =      j-TWEB,
  volume =       "7",
  number =       "3",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2516633.2516634",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:20 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Image annotation is a process of finding appropriate
                 semantic labels for images in order to obtain a more
                 convenient way for indexing and searching images on the
                 Web. This article proposes a novel method for image
                 annotation based on combining feature-word
                 distributions, which map from visual space to word
                 space, and word-topic distributions, which form a
                 structure to capture label relationships for
                 annotation. We refer to this type of model as
                 Feature-Word-Topic models. The introduction of topics
                 allows us to efficiently take word associations, such
                 as {ocean, fish, coral} or {desert, sand, cactus}, into
                 account for image annotation. Unlike previous
                 topic-based methods, we do not consider topics as joint
                 distributions of words and visual features, but as
                 distributions of words only. Feature-word distributions
                 are utilized to define weights in computation of topic
                 distributions for annotation. By doing so, topic models
                 in text mining can be applied directly in our method.
                 Our Feature-word-topic model, which exploits Gaussian
                 Mixtures for feature-word distributions, and
                 probabilistic Latent Semantic Analysis (pLSA) for
                 word-topic distributions, shows that our method is able
                 to obtain promising results in image annotation and
                 retrieval.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Vargiu:2013:ICA,
  author =       "Eloisa Vargiu and Alessandro Giuliani and Giuliano
                 Armano",
  title =        "Improving contextual advertising by adopting
                 collaborative filtering",
  journal =      j-TWEB,
  volume =       "7",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2516633.2516635",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:20 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Contextual advertising can be viewed as an information
                 filtering task aimed at selecting suitable ads to be
                 suggested to the final ``user'', that is, the Web page
                 in hand. Starting from this insight, in this article we
                 propose a novel system, which adopts a collaborative
                 filtering approach to perform contextual advertising.
                 In particular, given a Web page, the system relies on
                 collaborative filtering to classify the page content
                 and to suggest suitable ads accordingly. Useful
                 information is extracted from ``inlinks'', that is,
                 similar pages that link to the Web page in hand. In so
                 doing, collaborative filtering is used in a
                 content-based setting, giving rise to a hybrid
                 contextual advertising system. After being implemented,
                 the system has been experimented with about 15000 Web
                 pages extracted from the Open Directory Project.
                 Comparative experiments with a content-based system
                 have been performed. The corresponding results
                 highlight that the proposed system performs better. A
                 suitable case study is also provided to enable the
                 reader to better understand how the system works and
                 its effectiveness.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Conti:2013:VPS,
  author =       "Mauro Conti and Arbnor Hasani and Bruno Crispo",
  title =        "Virtual private social networks and a {Facebook}
                 implementation",
  journal =      j-TWEB,
  volume =       "7",
  number =       "3",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2516633.2516636",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:20 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The popularity of Social Networking Sites (SNS) is
                 growing rapidly, with the largest sites serving
                 hundreds of millions of users and their private
                 information. The privacy settings of these SNSs do not
                 allow the user to avoid sharing some information (e.g.,
                 name and profile picture) with all the other users.
                 Also, no matter the privacy settings, this information
                 is always shared with the SNS (that could sell this
                 information or be hacked). To mitigate these threats,
                 we recently introduced the concept of Virtual Private
                 Social Networks (VPSNs). In this work we propose the
                 first complete architecture and implementation of VPSNs
                 for Facebook. In particular, we address an important
                 problem left unexplored in our previous research-that
                 is the automatic propagation of updated profiles to all
                 the members of the same VPSN. Furthermore, we made an
                 in-depth study on performance and implemented several
                 optimization to reduce the impact of VPSN on user
                 experience. The proposed solution is lightweight,
                 completely distributed, does not depend on the
                 collaboration from Facebook, does not have a central
                 point of failure, it offers (with some limitations) the
                 same functionality as Facebook, and apart from some
                 simple settings, the solution is almost transparent to
                 the user. Thorough experiments, with an extended set of
                 parameters, we have confirmed the feasibility of the
                 proposal and have shown a very limited time-overhead
                 experienced by the user while browsing Facebook
                 pages.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Cambazoglu:2013:TBI,
  author =       "B. Barla Cambazoglu and Enver Kayaaslan and Simon
                 Jonassen and Cevdet Aykanat",
  title =        "A term-based inverted index partitioning model for
                 efficient distributed query processing",
  journal =      j-TWEB,
  volume =       "7",
  number =       "3",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2516633.2516637",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:20 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In a shared-nothing, distributed text retrieval
                 system, queries are processed over an inverted index
                 that is partitioned among a number of index servers. In
                 practice, the index is either document-based or
                 term-based partitioned. This choice is made depending
                 on the properties of the underlying hardware
                 infrastructure, query traffic distribution, and some
                 performance and availability constraints. In query
                 processing on retrieval systems that adopt a term-based
                 index partitioning strategy, the high communication
                 overhead due to the transfer of large amounts of data
                 from the index servers forms a major performance
                 bottleneck, deteriorating the scalability of the entire
                 distributed retrieval system. In this work, to
                 alleviate this problem, we propose a novel inverted
                 index partitioning model that relies on hypergraph
                 partitioning. In the proposed model, concurrently
                 accessed index entries are assigned to the same index
                 servers, based on the inverted index access patterns
                 extracted from the past query logs. The model aims to
                 minimize the communication overhead that will be
                 incurred by future queries while maintaining the
                 computational load balance among the index servers. We
                 evaluate the performance of the proposed model through
                 extensive experiments using a real-life text collection
                 and a search query sample. Our results show that
                 considerable performance gains can be achieved relative
                 to the term-based index partitioning strategies
                 previously proposed in literature. In most cases,
                 however, the performance remains inferior to that
                 attained by document-based partitioning.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Weninger:2013:PPF,
  author =       "Tim Weninger and Thomas J. Johnston and Jiawei Han",
  title =        "The parallel path framework for entity discovery on
                 the web",
  journal =      j-TWEB,
  volume =       "7",
  number =       "3",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2516633.2516638",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:20 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "It has been a dream of the database and Web
                 communities to reconcile the unstructured nature of the
                 World Wide Web with the neat, structured schemas of the
                 database paradigm. Even though databases are currently
                 used to generate Web content in some sites, the schemas
                 of these databases are rarely consistent across a
                 domain. This makes the comparison and aggregation of
                 information from different domains difficult. We aim to
                 make an important step towards resolving this disparity
                 by using the structural and relational information on
                 the Web to (1) extract Web lists, (2) find
                 entity-pages, (3) map entity-pages to a database, and
                 (4) extract attributes of the entities. Specifically,
                 given a Web site and an entity-page (e.g., university
                 department and faculty member home page) we seek to
                 find all of the entity-pages of the same type (e.g.,
                 all faculty members in the department), as well as
                 attributes of the specific entities (e.g., their phone
                 numbers, email addresses, office numbers). To do this,
                 we propose a Web structure mining method which grows
                 parallel paths through the Web graph and DOM trees and
                 propagates relevant attribute information forward. We
                 show that by utilizing these parallel paths we can
                 efficiently discover entity-pages and attributes.
                 Finally, we demonstrate the accuracy of our method with
                 a large case study.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Liu:2013:SCB,
  author =       "Liwei Liu and Freddy Lecue and Nikolay Mehandjiev",
  title =        "Semantic content-based recommendation of software
                 services using context",
  journal =      j-TWEB,
  volume =       "7",
  number =       "3",
  pages =        "17:1--17:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2516633.2516639",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:20 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The current proliferation of software services means
                 users should be supported when selecting one service
                 out of the many which meet their needs. Recommender
                 Systems provide such support for selecting products and
                 conventional services, yet their direct application to
                 software services is not straightforward, because of
                 the current scarcity of available user feedback, and
                 the need to fine-tune software services to the context
                 of intended use. In this article, we address these
                 issues by proposing a semantic content-based
                 recommendation approach that analyzes the context of
                 intended service use to provide effective
                 recommendations in conditions of scarce user feedback.
                 The article ends with two experiments based on a
                 realistic set of semantic services. The first
                 experiment demonstrates how the proposed semantic
                 content-based approach can produce effective
                 recommendations using semantic reasoning over service
                 specifications by comparing it with three other
                 approaches. The second experiment demonstrates the
                 effectiveness of the proposed context analysis
                 mechanism by comparing the performance of both
                 context-aware and plain versions of our semantic
                 content-based approach, benchmarked against
                 user-performed selection informed by context.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Jiang:2013:ULI,
  author =       "Jing Jiang and Christo Wilson and Xiao Wang and
                 Wenpeng Sha and Peng Huang and Yafei Dai and Ben Y.
                 Zhao",
  title =        "Understanding latent interactions in online social
                 networks",
  journal =      j-TWEB,
  volume =       "7",
  number =       "4",
  pages =        "18:1--18:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2517040",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:21 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Popular online social networks (OSNs) like Facebook
                 and Twitter are changing the way users communicate and
                 interact with the Internet. A deep understanding of
                 user interactions in OSNs can provide important
                 insights into questions of human social behavior and
                 into the design of social platforms and applications.
                 However, recent studies have shown that a majority of
                 user interactions on OSNs are latent interactions, that
                 is, passive actions, such as profile browsing, that
                 cannot be observed by traditional measurement
                 techniques. In this article, we seek a deeper
                 understanding of both active and latent user
                 interactions in OSNs. For quantifiable data on latent
                 user interactions, we perform a detailed measurement
                 study on Renren, the largest OSN in China with more
                 than 220 million users to date. All friendship links in
                 Renren are public, allowing us to exhaustively crawl a
                 connected graph component of 42 million users and 1.66
                 billion social links in 2009. Renren also keeps
                 detailed, publicly viewable visitor logs for each user
                 profile. We capture detailed histories of profile
                 visits over a period of 90 days for users in the Peking
                 University Renren network and use statistics of profile
                 visits to study issues of user profile popularity,
                 reciprocity of profile visits, and the impact of
                 content updates on user popularity. We find that latent
                 interactions are much more prevalent and frequent than
                 active events, are nonreciprocal in nature, and that
                 profile popularity is correlated with page views of
                 content rather than with quantity of content updates.
                 Finally, we construct latent interaction graphs as
                 models of user browsing behavior and compare their
                 structural properties, evolution, community structure,
                 and mixing times against those of both active
                 interaction graphs and social graphs.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Quarteroni:2013:BKA,
  author =       "Silvia Quarteroni and Marco Brambilla and Stefano
                 Ceri",
  title =        "A bottom-up, knowledge-aware approach to integrating
                 and querying {Web} data services",
  journal =      j-TWEB,
  volume =       "7",
  number =       "4",
  pages =        "19:1--19:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2493536",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:21 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "As a wealth of data services is becoming available on
                 the Web, building and querying Web applications that
                 effectively integrate their content is increasingly
                 important. However, schema integration and ontology
                 matching with the aim of registering data services
                 often requires a knowledge-intensive, tedious, and
                 error-prone manual process. We tackle this issue by
                 presenting a bottom-up, semi-automatic service
                 registration process that refers to an external
                 knowledge base and uses simple text processing
                 techniques in order to minimize and possibly avoid the
                 contribution of domain experts in the annotation of
                 data services. The first by-product of this process is
                 a representation of the domain of data services as an
                 entity-relationship diagram, whose entities are named
                 after concepts of the external knowledge base matching
                 service terminology rather than being manually created
                 to accommodate an application-specific ontology.
                 Second, a three-layer annotation of service semantics
                 (service interfaces, access patterns, service marts)
                 describing how services ``play'' with such domain
                 elements is also automatically constructed at
                 registration time. When evaluated against heterogeneous
                 existing data services and with a synthetic service
                 dataset constructed using Google Fusion Tables, the
                 approach yields good results in terms of data
                 representation accuracy. We subsequently demonstrate
                 that natural language processing methods can be used to
                 decompose and match simple queries to the data services
                 represented in three layers according to the preceding
                 methodology with satisfactory results. We show how
                 semantic annotations are used at query time to convert
                 the user's request into an executable logical query.
                 Globally, our findings show that the proposed
                 registration method is effective in creating a uniform
                 semantic representation of data services, suitable for
                 building Web applications and answering search
                 queries.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Leiva:2013:WBB,
  author =       "Luis A. Leiva and Roberto Viv{\'o}",
  title =        "{Web} browsing behavior analysis and interactive
                 hypervideo",
  journal =      j-TWEB,
  volume =       "7",
  number =       "4",
  pages =        "20:1--20:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2529995.2529996",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:21 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Processing data on any sort of user interaction is
                 well known to be cumbersome and mostly time consuming.
                 In order to assist researchers in easily inspecting
                 fine-grained browsing data, current tools usually
                 display user interactions as mouse cursor tracks, a
                 video-like visualization scheme. However, to date,
                 traditional online video inspection has not explored
                 the full capabilities of hypermedia and interactive
                 techniques. In response to this need, we have developed
                 SMT2 \epsilon , a Web-based tracking system for
                 analyzing browsing behavior using feature-rich
                 hypervideo visualizations. We compare our system to
                 related work in academia and the industry, showing that
                 ours features unprecedented visualization capabilities.
                 We also show that SMT2 \epsilon efficiently captures
                 browsing data and is perceived by users to be both
                 helpful and usable. A series of prediction experiments
                 illustrate that raw cursor data are accessible and can
                 be easily handled, providing evidence that the data can
                 be used to construct and verify research hypotheses.
                 Considering its limitations, it is our hope that SMT2
                 \epsilon will assist researchers, usability
                 practitioners, and other professionals interested in
                 understanding how users browse the Web.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bing:2013:RDS,
  author =       "Lidong Bing and Wai Lam and Tak-Lam Wong",
  title =        "Robust detection of semi-structured web records using
                 a {DOM} structure-knowledge-driven model",
  journal =      j-TWEB,
  volume =       "7",
  number =       "4",
  pages =        "21:1--21:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2508434",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:21 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Web data record extraction aims at extracting a set of
                 similar object records from a single webpage. These
                 records have similar attributes or fields and are
                 presented with a regular format in a coherent region of
                 the page. To tackle this problem, most existing works
                 analyze the DOM tree of an input page. One major
                 limitation of these methods is that the lack of a
                 global view in detecting data records from an input
                 page results in a myopic decision. Their brute-force
                 searching manner in detecting various types of records
                 degrades the flexibility and robustness. We propose a
                 Structure-Knowledge-Oriented Global Analysis (Skoga)
                 framework which can perform robust detection of
                 different-kinds of data records and record regions. The
                 major component of the Skoga framework is a DOM
                 structure-knowledge-driven detection model which can
                 conduct a global analysis on the DOM structure to
                 achieve effective detection. The DOM structure
                 knowledge consists of background knowledge as well as
                 statistical knowledge capturing different
                 characteristics of data records and record regions, as
                 exhibited in the DOM structure. The background
                 knowledge encodes the semantics of labels indicating
                 general constituents of data records and regions. The
                 statistical knowledge is represented by some carefully
                 designed features that capture different
                 characteristics of a single node or a node group in the
                 DOM. The feature weights are determined using a
                 development dataset via a parameter estimation
                 algorithm based on a structured output support vector
                 machine. An optimization method based on the
                 divide-and-conquer principle is developed making use of
                 the DOM structure knowledge to quantitatively infer and
                 recognize appropriate records and regions for a page.
                 Extensive experiments have been conducted on four
                 datasets. The experimental results demonstrate that our
                 framework achieves higher accuracy compared with
                 state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Liao:2013:VAC,
  author =       "Zhen Liao and Daxin Jiang and Jian Pei and Yalou Huang
                 and Enhong Chen and Huanhuan Cao and Hang Li",
  title =        "A {vlHMM} approach to context-aware search",
  journal =      j-TWEB,
  volume =       "7",
  number =       "4",
  pages =        "22:1--22:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2490255",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:21 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Capturing the context of a user's query from the
                 previous queries and clicks in the same session leads
                 to a better understanding of the user's information
                 need. A context-aware approach to document reranking,
                 URL recommendation, and query suggestion may
                 substantially improve users' search experience. In this
                 article, we propose a general approach to context-aware
                 search by learning a variable length hidden Markov
                 model ( vlHMM ) from search sessions extracted from log
                 data. While the mathematical model is powerful, the
                 huge amounts of log data present great challenges. We
                 develop several distributed learning techniques to
                 learn a very large vlHMM under the map-reduce
                 framework. Moreover, we construct feature vectors for
                 each state of the vlHMM model to handle users' novel
                 queries not covered by the training data. We test our
                 approach on a raw dataset consisting of 1.9 billion
                 queries, 2.9 billion clicks, and 1.2 billion search
                 sessions before filtering, and evaluate the
                 effectiveness of the vlHMM learned from the real data
                 on three search applications: document reranking, query
                 suggestion, and URL recommendation. The experiment
                 results validate the effectiveness of vlHMM in the
                 applications of document reranking, URL recommendation,
                 and query suggestion.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{White:2013:CBD,
  author =       "Ryen W. White and Eric Horvitz",
  title =        "Captions and biases in diagnostic search",
  journal =      j-TWEB,
  volume =       "7",
  number =       "4",
  pages =        "23:1--23:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2486040",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:21 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "People frequently turn to the Web with the goal of
                 diagnosing medical symptoms. Studies have shown that
                 diagnostic search can often lead to anxiety about the
                 possibility that symptoms are explained by the presence
                 of rare, serious medical disorders, rather than far
                 more common benign syndromes. We study the influence of
                 the appearance of potentially-alarming content, such as
                 severe illnesses or serious treatment options
                 associated with the queried for symptoms, in captions
                 comprising titles, snippets, and URLs. We explore
                 whether users are drawn to results with
                 potentially-alarming caption content, and if so, the
                 implications of such attraction for the design of
                 search engines. We specifically study the influence of
                 the content of search result captions shown in response
                 to symptom searches on search-result click-through
                 behavior. We show that users are significantly more
                 likely to examine and click on captions containing
                 potentially-alarming medical terminology such as
                 ``heart attack'' or ``medical emergency'' independent
                 of result rank position and well-known positional
                 biases in users' search examination behaviors. The
                 findings provide insights about the possible effects of
                 displaying implicit correlates of searchers' goals in
                 search-result captions, such as unexpressed concerns
                 and fears. As an illustration of the potential utility
                 of these results, we developed and evaluated an
                 enhanced click prediction model that incorporates
                 potentially-alarming caption features and show that it
                 significantly outperforms models that ignore caption
                 content. Beyond providing additional understanding of
                 the effects of Web content on medical concerns, the
                 methods and findings have implications for search
                 engine design. As part of our discussion on the
                 implications of this research, we propose procedures
                 for generating more representative captions that may be
                 less likely to cause alarm, as well as methods for
                 learning to more appropriately rank search results from
                 logged search behavior, for examples, by also
                 considering the presence of potentially-alarming
                 content in the captions that motivate observed clicks
                 and down-weighting clicks seemingly driven by
                 searchers' health anxieties.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Lee:2013:SCA,
  author =       "Jung-Hyun Lee and Jongwoo Ha and Jin-Yong Jung and
                 Sangkeun Lee",
  title =        "Semantic contextual advertising based on the open
                 directory project",
  journal =      j-TWEB,
  volume =       "7",
  number =       "4",
  pages =        "24:1--24:??",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2529995.2529997",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:21 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Contextual advertising seeks to place relevant textual
                 ads within the content of generic webpages. In this
                 article, we explore a novel semantic approach to
                 contextual advertising. This consists of three tasks:
                 (1) building a well-organized hierarchical taxonomy of
                 topics, (2) developing a robust classifier for
                 effectively finding the topics of pages and ads, and
                 (3) ranking ads based on the topical relevance to
                 pages. First, we heuristically build our own taxonomy
                 of topics from the Open Directory Project (ODP).
                 Second, we investigate how to increase classification
                 accuracy by taking the unique characteristics of the
                 ODP into account. Last, we measure the topical
                 relevance of ads by applying a link analysis technique
                 to the similarity graph carefully derived from our
                 taxonomy. Experiments show that our classification
                 method improves the performance of Ma- F$_1$ by as much
                 as 25.7\% over the baseline classifier. In addition,
                 our ranking method enhances the relevance of ads
                 substantially, up to 10\% in terms of precision at k,
                 compared to a representative strategy.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Huang:2013:UEQ,
  author =       "Xiaodi Huang",
  title =        "{UsageQoS}: Estimating the {QoS} of {Web} Services
                 through Online User Communities",
  journal =      j-TWEB,
  volume =       "8",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2532635",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Services are an indispensable component in cloud
                 computing. Web services are particularly important. As
                 an increasing number of Web services provides
                 equivalent functions, one common issue faced by users
                 is the selection of the most appropriate one based on
                 quality. This article presents a conceptual framework
                 that characterizes the quality of Web services, an
                 algorithm that quantifies them, and a system
                 architecture that ranks Web services by using the
                 proposed algorithm. In particular, the algorithm,
                 called UsageQoS that computes the scores of quality of
                 service (QoS) of Web services within a community, makes
                 use of the usage frequencies of Web services. The
                 frequencies are defined as the numbers of times invoked
                 by other services in a given time period. The UsageQoS
                 algorithm is able to optionally take user ratings as
                 its initial input. The proposed approach has been
                 validated by extensively experimenting on several
                 datasets, including two real datasets. The results of
                 the experiments have demonstrated that our approach is
                 capable of estimating QoS parameters of Web services,
                 regardless of whether user ratings are available or
                 not.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Weber:2013:FBW,
  author =       "Ingo Weber and Hye-Young Paik and Boualem Benatallah",
  title =        "Form-Based {Web} Service Composition for Domain
                 Experts",
  journal =      j-TWEB,
  volume =       "8",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2542168",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "In many cases, it is not cost effective to automate
                 business processes which affect a small number of
                 people and/or change frequently. We present a novel
                 approach for enabling domain experts to model and
                 deploy such processes from their respective domain as
                 Web service compositions. The approach builds on
                 user-editable service, naming and representing Web
                 services as forms. On this basis, the approach provides
                 a visual composition language with a targeted
                 restriction of control-flow expressivity, process
                 simulation, automated process verification mechanisms,
                 and code generation for executing orchestrations. A
                 Web-based service composition prototype implements this
                 approach, including a WS-BPEL code generator. A small
                 lab user study with 14 participants showed promising
                 results for the usability of the system, even for
                 nontechnical domain experts.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Ozcan:2013:SCH,
  author =       "Rifat Ozcan and Ismail Sengor Altingovde and B. Barla
                 Cambazoglu and {\"O}zg{\"u}r Ulusoy",
  title =        "Second Chance: a Hybrid Approach for Dynamic Result
                 Caching and Prefetching in Search Engines",
  journal =      j-TWEB,
  volume =       "8",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2536777",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Web search engines are known to cache the results of
                 previously issued queries. The stored results typically
                 contain the document summaries and some data that is
                 used to construct the final search result page returned
                 to the user. An alternative strategy is to store in the
                 cache only the result document IDs, which take much
                 less space, allowing results of more queries to be
                 cached. These two strategies lead to an interesting
                 trade-off between the hit rate and the average query
                 response latency. In this work, in order to exploit
                 this trade-off, we propose a hybrid result caching
                 strategy where a dynamic result cache is split into two
                 sections: an HTML cache and a docID cache. Moreover,
                 using a realistic cost model, we evaluate the
                 performance of different result prefetching strategies
                 for the proposed hybrid cache and the baseline
                 HTML-only cache. Finally, we propose a machine learning
                 approach to predict singleton queries, which occur only
                 once in the query stream. We show that when the
                 proposed hybrid result caching strategy is coupled with
                 the singleton query predictor, the hit rate is further
                 improved.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Sherkat:2013:ETS,
  author =       "Reza Sherkat and Jing Li and Nikos Mamoulis",
  title =        "Efficient Time-Stamped Event Sequence Anonymization",
  journal =      j-TWEB,
  volume =       "8",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2532643",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "With the rapid growth of applications which generate
                 timestamped sequences (click streams, GPS trajectories,
                 RFID sequences), sequence anonymization has become an
                 important problem, in that should such data be
                 published or shared. Existing trajectory anonymization
                 techniques disregard the importance of time or the
                 sensitivity of events. This article is the first, to
                 our knowledge, thorough study on time-stamped event
                 sequence anonymization. We propose a novel and tunable
                 generalization framework tailored to event sequences.
                 We generalize time stamps using time intervals and
                 events using a taxonomy which models the domain
                 semantics. We consider two scenarios: (i) sharing the
                 data with a single receiver (the SSR setting), where
                 the receiver's background knowledge is confined to a
                 set of time stamps and time generalization suffices,
                 and (ii) sharing the data with colluding receivers (the
                 SCR setting), where time generalization should be
                 combined with event generalization. For both cases, we
                 propose appropriate anonymization methods that prevent
                 both user identification and event prediction. To
                 achieve computational efficiency and scalability, we
                 propose optimization techniques for both cases using a
                 utility-based index, compact summaries, fast to compute
                 bounds for utility, and a novel taxonomy-aware distance
                 function. Extensive experiments confirm the
                 effectiveness of our approach compared with state of
                 the art, in terms of information loss, range query
                 distortion, and preserving temporal causality patterns.
                 Furthermore, our experiments demonstrate efficiency and
                 scalability on large-scale real and synthetic
                 datasets.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bellido:2013:CFP,
  author =       "Jesus Bellido and Rosa Alarc{\'o}n and Cesare
                 Pautasso",
  title =        "Control-Flow Patterns for Decentralized {RESTful}
                 Service Composition",
  journal =      j-TWEB,
  volume =       "8",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2535911",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The REST architectural style has attracted a lot of
                 interest from industry due to the nonfunctional
                 properties it contributes to Web-based solutions.
                 SOAP/WSDL-based services, on the other hand, provide
                 tools and methodologies that allow the design and
                 development of software supporting complex service
                 arrangements, enabling complex business processes which
                 make use of well-known control-flow patterns. It is not
                 clear if and how such patterns should be modeled,
                 considering RESTful Web services that comply with the
                 statelessness, uniform interface and hypermedia
                 constraints. In this article, we analyze a set of
                 fundamental control-flow patterns in the context of
                 stateless compositions of RESTful services. We propose
                 a means of enabling their implementation using the HTTP
                 protocol and discuss the impact of our design choices
                 according to key REST architectural principles. We hope
                 to shed new light on the design of basic building
                 blocks for RESTful business processes.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Chelaru:2013:ADE,
  author =       "Sergiu Chelaru and Ismail Sengor Altingovde and Stefan
                 Siersdorfer and Wolfgang Nejdl",
  title =        "Analyzing, Detecting, and Exploiting Sentiment in
                 {Web} Queries",
  journal =      j-TWEB,
  volume =       "8",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2535525",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Thu Mar 13 08:28:23 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The Web contains an increasing amount of biased and
                 opinionated documents on politics, products, and
                 polarizing events. In this article, we present an
                 indepth analysis of Web search queries for
                 controversial topics, focusing on query sentiment. To
                 this end, we conduct extensive user assessments and
                 discriminative term analyses, as well as a sentiment
                 analysis using the SentiWordNet thesaurus, a lexical
                 resource containing sentiment annotations. Furthermore,
                 in order to detect the sentiment expressed in queries,
                 we build different classifiers based on query texts,
                 query result titles, and snippets. We demonstrate the
                 virtue of query sentiment detection in two different
                 use cases. First, we define a query recommendation
                 scenario that employs sentiment detection of results to
                 recommend additional queries for polarized queries
                 issued by search engine users. The second application
                 scenario is controversial topic discovery, where query
                 sentiment classifiers are employed to discover
                 previously unknown topics that trigger both highly
                 positive and negative opinions among the users of a
                 search engine. For both use cases, the results of our
                 evaluations on real-world data are promising and show
                 the viability and potential of query sentiment analysis
                 in practical scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Torres:2014:ASB,
  author =       "Sergio Duarte Torres and Ingmar Weber and Djoerd
                 Hiemstra",
  title =        "Analysis of Search and Browsing Behavior of Young
                 Users on the {Web}",
  journal =      j-TWEB,
  volume =       "8",
  number =       "2",
  pages =        "7:1--7:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2555595",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The Internet is increasingly used by young children
                 for all kinds of purposes. Nonetheless, there are not
                 many resources especially designed for children on the
                 Internet and most of the content online is designed for
                 grown-up users. This situation is problematic if we
                 consider the large differences between young users and
                 adults since their topic interests, computer skills,
                 and language capabilities evolve rapidly during
                 childhood. There is little research aimed at exploring
                 and measuring the difficulties that children encounter
                 on the Internet when searching for information and
                 browsing for content. In the first part of this work,
                 we employed query logs from a commercial search engine
                 to quantify the difficulties children of different ages
                 encounter on the Internet and to characterize the
                 topics that they search for. We employed query metrics
                 (e.g., the fraction of queries posed in natural
                 language), session metrics (e.g., the fraction of
                 abandoned sessions), and click activity (e.g., the
                 fraction of ad clicks). The search logs were also used
                 to retrace stages of child development. Concretely, we
                 looked for changes in interests (e.g., the distribution
                 of topics searched) and language development (e.g., the
                 readability of the content accessed and the vocabulary
                 size). In the second part of this work, we employed
                 toolbar logs from a commercial search engine to
                 characterize the browsing behavior of young users,
                 particularly to understand the activities on the
                 Internet that trigger search. We quantified the
                 proportion of browsing and search activity in the
                 toolbar sessions and we estimated the likelihood of a
                 user to carry out search on the Web vertical and
                 multimedia verticals (i.e., videos and images) given
                 that the previous event is another search event or a
                 browsing event. We observed that these metrics clearly
                 demonstrate an increased level of confusion and
                 unsuccessful search sessions among children. We also
                 found a clear relation between the reading level of the
                 clicked pages and characteristics of the users such as
                 age and educational attainment. In terms of browsing
                 behavior, children were found to start their activities
                 on the Internet with a search engine (instead of
                 directly browsing content) more often than adults. We
                 also observed a significantly larger amount of browsing
                 activity for the case of teenager users. Interestingly
                 we also found that if children visit knowledge-related
                 Web sites (i.e., information-dense pages such as
                 Wikipedia articles), they subsequently do more Web
                 searches than adults. Additionally, children and
                 especially teenagers were found to have a greater
                 tendency to engage in multimedia search, which calls to
                 improve the aggregation of multimedia results into the
                 current search result pages.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Su:2014:HIY,
  author =       "Ao-Jan Su and Y. Charlie Hu and Aleksandar Kuzmanovic
                 and Cheng-Kok Koh",
  title =        "How to Improve Your Search Engine Ranking: Myths and
                 Reality",
  journal =      j-TWEB,
  volume =       "8",
  number =       "2",
  pages =        "8:1--8:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2579990",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Search engines have greatly influenced the way people
                 access information on the Internet, as such engines
                 provide the preferred entry point to billions of pages
                 on the Web. Therefore, highly ranked Web pages
                 generally have higher visibility to people and pushing
                 the ranking higher has become the top priority for Web
                 masters. As a matter of fact, Search Engine
                 Optimization (SEO) has became a sizeable business that
                 attempts to improve their clients' ranking. Still, the
                 lack of ways to validate SEO's methods has created
                 numerous myths and fallacies associated with ranking
                 algorithms. In this article, we focus on two ranking
                 algorithms, Google's and Bing's, and design, implement,
                 and evaluate a ranking system to systematically
                 validate assumptions others have made about these
                 popular ranking algorithms. We demonstrate that linear
                 learning models, coupled with a recursive partitioning
                 ranking scheme, are capable of predicting ranking
                 results with high accuracy. As an example, we manage to
                 correctly predict 7 out of the top 10 pages for 78\% of
                 evaluated keywords. Moreover, for content-only ranking,
                 our system can correctly predict 9 or more pages out of
                 the top 10 ones for 77\% of search terms. We show how
                 our ranking system can be used to reveal the relative
                 importance of ranking features in a search engine's
                 ranking function, provide guidelines for SEOs and Web
                 masters to optimize their Web pages, validate or
                 disprove new ranking features, and evaluate search
                 engine ranking results for possible ranking bias.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Sirivianos:2014:LSF,
  author =       "Michael Sirivianos and Kyungbaek Kim and Jian Wei Gan
                 and Xiaowei Yang",
  title =        "Leveraging Social Feedback to Verify Online Identity
                 Claims",
  journal =      j-TWEB,
  volume =       "8",
  number =       "2",
  pages =        "9:1--9:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2543711",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Anonymity is one of the main virtues of the Internet,
                 as it protects privacy and enables users to express
                 opinions more freely. However, anonymity hinders the
                 assessment of the veracity of assertions that online
                 users make about their identity attributes, such as age
                 or profession. We propose FaceTrust, a system that uses
                 online social networks to provide lightweight identity
                 credentials while preserving a user's anonymity.
                 FaceTrust employs a ``game with a purpose'' design to
                 elicit the opinions of the friends of a user about the
                 user's self-claimed identity attributes, and uses
                 attack-resistant trust inference to assign veracity
                 scores to identity attribute assertions. FaceTrust
                 provides credentials, which a user can use to
                 corroborate his assertions. We evaluate our proposal
                 using a live Facebook deployment and simulations on a
                 crawled social graph. The results show that our
                 veracity scores are strongly correlated with the ground
                 truth, even when dishonest users make up a large
                 fraction of the social network and employ the Sybil
                 attack.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Pugliese:2014:EMM,
  author =       "Andrea Pugliese and Matthias Br{\"o}cheler and V. S.
                 Subrahmanian and Michael Ovelg{\"o}nne",
  title =        "Efficient {MultiView} Maintenance under Insertion in
                 Huge Social Networks",
  journal =      j-TWEB,
  volume =       "8",
  number =       "2",
  pages =        "10:1--10:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2541290",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Applications to monitor various aspects of social
                 networks are becoming increasingly popular. For
                 instance, marketers want to look for semantic patterns
                 relating to the content of tweets and Facebook posts
                 relating to their products. Law enforcement agencies
                 want to track behaviors involving potential criminals
                 on the Internet by looking for certain patterns of
                 behavior. Music companies want to track patterns of
                 spread of illegal music. These applications allow
                 multiple users to specify patterns of interest and
                 monitor them in real time as new data gets added to the
                 Web or to a social network. In this article we develop
                 the concept of social network view servers in which all
                 of these types of applications can be simultaneously
                 monitored. The patterns of interest are expressed as
                 views over an underlying graph or social network
                 database. We show that a given set of views can be
                 compiled in multiple possible ways to take advantage of
                 common substructures and define the concept of an
                 optimal merge. Though finding an optimal merge is shown
                 to be NP-hard, we develop the AddView to find very good
                 merges quickly. We develop a very fast MultiView
                 algorithm that scalably and efficiently maintains
                 multiple subgraph views when insertions are made to the
                 social network database. We show that our algorithm is
                 correct, study its complexity, and experimentally
                 demonstrate that our algorithm can scalably handle
                 updates to hundreds of views on 6 real-world social
                 network databases with up to 540M edges.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bislimovska:2014:TCB,
  author =       "Bojana Bislimovska and Alessandro Bozzon and Marco
                 Brambilla and Piero Fraternali",
  title =        "Textual and Content-Based Search in Repositories of
                 {Web} Application Models",
  journal =      j-TWEB,
  volume =       "8",
  number =       "2",
  pages =        "11:1--11:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2579991",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Model-driven engineering relies on collections of
                 models, which are the primary artifacts for software
                 development. To enable knowledge sharing and reuse,
                 models need to be managed within repositories, where
                 they can be retrieved upon users' queries. This article
                 examines two different techniques for indexing and
                 searching model repositories, with a focus on Web
                 development projects encoded in a domain-specific
                 language. Keyword-based and content-based search (also
                 known as query-by-example) are contrasted with respect
                 to the architecture of the system, the processing of
                 models and queries, and the way in which metamodel
                 knowledge can be exploited to improve search. A
                 thorough experimental evaluation is conducted to
                 examine what parameter configurations lead to better
                 accuracy and to offer an insight in what queries are
                 addressed best by each system.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Bellogin:2014:NSW,
  author =       "Alejandro Bellog{\'\i}n and Pablo Castells and
                 Iv{\'a}n Cantador",
  title =        "Neighbor Selection and Weighting in User-Based
                 Collaborative Filtering: a Performance Prediction
                 Approach",
  journal =      j-TWEB,
  volume =       "8",
  number =       "2",
  pages =        "12:1--12:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2579993",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Tue Apr 1 05:42:19 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "User-based collaborative filtering systems suggest
                 interesting items to a user relying on similar-minded
                 people called neighbors. The selection and weighting of
                 these neighbors characterize the different
                 recommendation approaches. While standard strategies
                 perform a neighbor selection based on user
                 similarities, trust-aware recommendation algorithms
                 rely on other aspects indicative of user trust and
                 reliability. In this article we restate the trust-aware
                 recommendation problem, generalizing it in terms of
                 performance prediction techniques, whose goal is to
                 predict the performance of an information retrieval
                 system in response to a particular query. We
                 investigate how to adopt the preceding generalization
                 to define a unified framework where we conduct an
                 objective analysis of the effectiveness (predictive
                 power) of neighbor scoring functions. The proposed
                 framework enables discriminating whether recommendation
                 performance improvements are caused by the used
                 neighbor scoring functions or by the ways these
                 functions are used in the recommendation computation.
                 We evaluated our approach with several state-of-the-art
                 and novel neighbor scoring functions on three publicly
                 available datasets. By empirically comparing four
                 neighbor quality metrics and thirteen performance
                 predictors, we found strong predictive power for some
                 of the predictors with respect to certain metrics. This
                 result was then validated by checking the final
                 performance of recommendation strategies where
                 predictors are used for selecting and/or weighting user
                 neighbors. As a result, we have found that, by
                 measuring the predictive power of neighbor performance
                 predictors, we are able to anticipate which predictors
                 are going to perform better in neighbor-scoring-powered
                 versions of a user-based collaborative filtering
                 algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Qian:2014:FTD,
  author =       "Yi Qian and Sibel Adali",
  title =        "Foundations of Trust and Distrust in Networks:
                 Extended Structural Balance Theory",
  journal =      j-TWEB,
  volume =       "8",
  number =       "3",
  pages =        "13:1--13:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2628438",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Modeling trust in very large social networks is a hard
                 problem due to the highly noisy nature of these
                 networks that span trust relationships from many
                 different contexts, based on judgments of reliability,
                 dependability, and competence. Furthermore,
                 relationships in these networks vary in their level of
                 strength. In this article, we introduce a novel
                 extension of structural balance theory as a
                 foundational theory of trust and distrust in networks.
                 Our theory preserves the distinctions between trust and
                 distrust as suggested in the literature, but also
                 incorporates the notion of relationship strength that
                 can be expressed as either discrete categorical values,
                 as pairwise comparisons, or as metric distances. Our
                 model is novel, has sound social and psychological
                 basis, and captures the classical balance theory as a
                 special case. We then propose a convergence model,
                 describing how an imbalanced network evolves towards
                 new balance, and formulate the convergence problem of a
                 social network as a Metric Multidimensional Scaling
                 (MDS) optimization problem. Finally, we show how the
                 convergence model can be used to predict edge signs in
                 social networks and justify our theory through
                 extensive experiments on real datasets.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Soi:2014:CDC,
  author =       "Stefano Soi and Florian Daniel and Fabio Casati",
  title =        "Conceptual Development of Custom, Domain-Specific
                 Mashup Platforms",
  journal =      j-TWEB,
  volume =       "8",
  number =       "3",
  pages =        "14:1--14:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2628439",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Despite the common claim by mashup platforms that they
                 enable end-users to develop their own software, in
                 practice end-users still don't develop their own
                 mashups, as the highly technical or inexistent [sic]
                 user bases of today's mashup platforms testify. The key
                 shortcoming of current platforms is their
                 general-purpose nature, that privileges expressive
                 power over intuitiveness. In our prior work, we have
                 demonstrated that a domain-specific mashup approach,
                 which privileges intuitiveness over expressive power,
                 has much more potential to enable end-user development
                 (EUD). The problem is that developing mashup
                 platforms-domain-specific or not-is complex and time
                 consuming. In addition, domain-specific mashup
                 platforms by their very nature target only a small user
                 basis, that is, the experts of the target domain, which
                 makes their development not sustainable if it is not
                 adequately supported and automated. With this article,
                 we aim to make the development of custom,
                 domain-specific mashup platforms cost-effective. We
                 describe a mashup tool development kit (MDK) that is
                 able to automatically generate a mashup platform
                 (comprising custom mashup and component description
                 languages and design-time and runtime environments)
                 from a conceptual design and to provision it as a
                 service. We equip the kit with a dedicated development
                 methodology and demonstrate the applicability and
                 viability of the approach with the help of two case
                 studies.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Zhang:2014:PBT,
  author =       "Xianchao Zhang and You Wang and Nan Mou and Wenxin
                 Liang",
  title =        "Propagating Both Trust and Distrust with Target
                 Differentiation for Combating Link-Based {Web} Spam",
  journal =      j-TWEB,
  volume =       "8",
  number =       "3",
  pages =        "15:1--15:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2628440",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Semi-automatic anti-spam algorithms propagate either
                 trust through links from a good seed set (e.g.,
                 TrustRank) or distrust through inverse links from a bad
                 seed set (e.g., Anti-TrustRank) to the entire Web.
                 These kinds of algorithms have shown their powers in
                 combating link-based Web spam since they integrate both
                 human judgement and machine intelligence. Nevertheless,
                 there is still much space for improvement. One issue of
                 most existing trust/distrust propagation algorithms is
                 that only trust or distrust is propagated and only a
                 good seed set or a bad seed set is used. According to
                 Wu et al. [2006a], a combined usage of both trust and
                 distrust propagation can lead to better results, and an
                 effective framework is needed to realize this insight.
                 Another more serious issue of existing algorithms is
                 that trust or distrust is propagated in nondifferential
                 ways, that is, a page propagates its trust or distrust
                 score uniformly to its neighbors, without considering
                 whether each neighbor should be trusted or distrusted.
                 Such kinds of blind propagating schemes are
                 inconsistent with the original intention of
                 trust/distrust propagation. However, it seems
                 impossible to implement differential propagation if
                 only trust or distrust is propagated. In this article,
                 we take the view that each Web page has both a
                 trustworthy side and an untrustworthy side, and we
                 thusly assign two scores to each Web page: T-Rank,
                 scoring the trustworthiness of the page, and D-Rank,
                 scoring the untrustworthiness of the page. We then
                 propose an integrated framework that propagates both
                 trust and distrust. In the framework, the propagation
                 of T-Rank/D-Rank is penalized by the target's current
                 D-Rank/T-Rank. In other words, the propagation of
                 T-Rank/D-Rank is decided by the target's current
                 (generalized) probability of being
                 trustworthy/untrustworthy; thus a page propagates more
                 trust/distrust to a trustworthy/untrustworthy neighbor
                 than to an untrustworthy/trustworthy neighbor. In this
                 way, propagating both trust and distrust with target
                 differentiation is implemented. We use T-Rank scores to
                 realize spam demotion and D-Rank scores to accomplish
                 spam detection. The proposed Trust-DistrustRank (TDR)
                 algorithm regresses to TrustRank and Anti-TrustRank
                 when the penalty factor is set to 1 and 0,
                 respectively. Thus TDR could be seen as a combinatorial
                 generalization of both TrustRank and Anti-TrustRank.
                 TDR not only makes full use of both trust and distrust
                 propagation, but also overcomes the disadvantages of
                 both TrustRank and Anti-TrustRank. Experimental results
                 on benchmark datasets show that TDR outperforms other
                 semi-automatic anti-spam algorithms for both spam
                 demotion and spam detection tasks under various
                 criteria.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Margaritis:2014:ITI,
  author =       "Giorgos Margaritis and Stergios V. Anastasiadis",
  title =        "Incremental Text Indexing for Fast Disk-Based Search",
  journal =      j-TWEB,
  volume =       "8",
  number =       "3",
  pages =        "16:1--16:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2560800",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "Real-time search requires to incrementally ingest
                 content updates and almost immediately make them
                 searchable while serving search queries at low latency.
                 This is currently feasible for datasets of moderate
                 size by fully maintaining the index in the main memory
                 of multiple machines. Instead, disk-based methods for
                 incremental index maintenance substantially increase
                 search latency with the index fragmented across
                 multiple disk locations. For the support of fast search
                 over disk-based storage, we take a fresh look at
                 incremental text indexing in the context of current
                 architectural features. We introduce a greedy method
                 called Selective Range Flush (SRF) to contiguously
                 organize the index over disk blocks and dynamically
                 update it at low cost. We show that SRF requires
                 substantial experimental effort to tune specific
                 parameters for performance efficiency. Subsequently, we
                 propose the Unified Range Flush (URF) method, which is
                 conceptually simpler than SRF, achieves similar or
                 better performance with fewer parameters and less
                 tuning, and is amenable to I/O complexity analysis. We
                 implement interesting variations of the two methods in
                 the Proteus prototype search engine that we developed
                 and do extensive experiments with three different Web
                 datasets of size up to 1TB. Across different systems,
                 we show that our methods offer search latency that
                 matches or reduces up to half the lowest achieved by
                 existing disk-based methods. In comparison to an
                 existing method of comparable search latency on the
                 same system, our methods reduce by a factor of 2.0--2.4
                 the I/O part of build time and by 21--24\% the total
                 build time.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Siersdorfer:2014:AMC,
  author =       "Stefan Siersdorfer and Sergiu Chelaru and Jose {San
                 Pedro} and Ismail Sengor Altingovde and Wolfgang
                 Nejdl",
  title =        "Analyzing and Mining Comments and Comment Ratings on
                 the Social {Web}",
  journal =      j-TWEB,
  volume =       "8",
  number =       "3",
  pages =        "17:1--17:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2628441",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "An analysis of the social video sharing platform
                 YouTube and the news aggregator Yahoo! News reveals the
                 presence of vast amounts of community feedback through
                 comments for published videos and news stories, as well
                 as through metaratings for these comments. This article
                 presents an in-depth study of commenting and comment
                 rating behavior on a sample of more than 10 million
                 user comments on YouTube and Yahoo! News. In this
                 study, comment ratings are considered first-class
                 citizens. Their dependencies with textual content,
                 thread structure of comments, and associated content
                 (e.g., videos and their metadata) are analyzed to
                 obtain a comprehensive understanding of the community
                 commenting behavior. Furthermore, this article explores
                 the applicability of machine learning and data mining
                 to detect acceptance of comments by the community,
                 comments likely to trigger discussions, controversial
                 and polarizing content, and users exhibiting offensive
                 commenting behavior. Results from this study have
                 potential application in guiding the design of
                 community-oriented online discussion platforms.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Casteleyn:2014:TYR,
  author =       "Sven Casteleyn and Irene Garrig{\'o}s and
                 Jose-Norberto Maz{\'o}n",
  title =        "Ten Years of {Rich Internet Applications}: a
                 Systematic Mapping Study, and Beyond",
  journal =      j-TWEB,
  volume =       "8",
  number =       "3",
  pages =        "18:1--18:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2626369",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "The term Rich Internet Applications (RIAs) is
                 generally associated with Web applications that provide
                 the features and functionality of traditional desktop
                 applications. Ten years after the introduction of the
                 term, an ample amount of research has been carried out
                 to study various aspects of RIAs. It has thus become
                 essential to summarize this research and provide an
                 adequate overview. OBJECTIVE. The objective of our
                 study is to assemble, classify, and analyze all RIA
                 research performed in the scientific community, thus
                 providing a consolidated overview thereof, and to
                 identify well-established topics, trends, and open
                 research issues. Additionally, we provide a qualitative
                 discussion of the most interesting findings. This work
                 therefore serves as a reference work for beginning and
                 established RIA researchers alike, as well as for
                 industrial actors that need an introduction in the
                 field, or seek pointers to (a specific subset of) the
                 state-of-the-art. METHOD. A systematic mapping study is
                 performed in order to identify all RIA-related
                 publications, define a classification scheme, and
                 categorize, analyze, and discuss the identified
                 research according to it. RESULTS. Our source
                 identification phase resulted in 133 relevant,
                 peer-reviewed publications, published between 2002 and
                 2011 in a wide variety of venues. They were
                 subsequently classified according to four facets:
                 development activity, research topic, contribution
                 type, and research type. Pie, stacked bar, and bubble
                 charts were used to depict and analyze the results. A
                 deeper analysis is provided for the most interesting
                 and/or remarkable results. CONCLUSION. Analysis of the
                 results shows that, although the RIA term was coined in
                 2002, the first RIA-related research appeared in 2004.
                 From 2007 there was a significant increase in research
                 activity, peaking in 2009 and decreasing to pre-2009
                 levels afterwards. All development phases are covered
                 in the identified research, with emphasis on ``design''
                 (33\%) and ``implementation'' (29\%). The majority of
                 research proposes a ``method'' (44\%), followed by
                 ``model'' (22\%), ``methodology'' (18\%), and ``tools''
                 (16\%); no publications in the category ``metrics''
                 were found. The preponderant research topic is
                 ``models, methods and methodologies'' (23\%) and, to a
                 lesser extent, ``usability and accessibility'' and
                 ``user interface'' (11\% each). On the other hand, the
                 topic ``localization, internationalization and
                 multilinguality'' received no attention at all, and
                 topics such as ``deep Web'' (under 1\%), ``business
                 processing'', ``usage analysis'', ``data management'',
                 ``quality and metrics'' (all under 2\%), ``semantics'',
                 and ``performance'' (slightly above 2\%) received very
                 little attention. Finally, there is a large majority of
                 ``solution proposals'' (66\%), few ``evaluation
                 research'' (14\%), and even fewer ``validation'' (6\%),
                 although the latter have been increasing in recent
                 years.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}

@Article{Dincturk:2014:MBA,
  author =       "Mustafa Emre Dincturk and Guy-Vincent Jourdan and
                 Gregor V. Bochmann and Iosif Viorel Onut",
  title =        "A Model-Based Approach for Crawling {Rich Internet
                 Applications}",
  journal =      j-TWEB,
  volume =       "8",
  number =       "3",
  pages =        "19:1--19:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2626371",
  ISSN =         "1559-1131 (print), 1559-114X (electronic)",
  ISSN-L =       "1559-1131",
  bibdate =      "Wed Jul 2 18:17:48 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tweb.bib",
  abstract =     "New Web technologies, like AJAX, result in more
                 responsive and interactive Web applications, sometimes
                 called Rich Internet Applications (RIAs). Crawling
                 techniques developed for traditional Web applications
                 are not sufficient for crawling RIAs. The inability to
                 crawl RIAs is a problem that needs to be addressed for
                 at least making RIAs searchable and testable. We
                 present a new methodology, called ``model-based
                 crawling'', that can be used as a basis to design
                 efficient crawling strategies for RIAs. We illustrate
                 model-based crawling with a sample strategy, called the
                 ``hypercube strategy''. The performances of our
                 model-based crawling strategies are compared against
                 existing standard crawling strategies, including
                 breadth-first, depth-first, and a greedy strategy.
                 Experimental results show that our model-based crawling
                 approach is significantly more efficient than these
                 standard strategies.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on the Web (TWEB)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1062",
}