# reviewed.bib

@inproceedings{psc,
author = {Ellis Fenske* and Akshaya Mani* and Aaron Johnson and Micah Sherr},
booktitle = {{ACM Conference on Computer and Communications Security (CCS)}},
month = {November},
documenturl = {papers/psc.pdf},
note = {(* co-first authors)},
title = {{Distributed Measurement with Private Set-Union Cardinality}},
year = {2017},
abstract = {This paper introduces a cryptographic protocol for efficiently
aggregating a count of unique items across a set of data parties
privately --- that is, without exposing any information other than
the count.  Our protocol allows for more secure and useful
statistics gathering in privacy-preserving distributed systems such
as anonymity networks; for example, it allows operators of anonymity
networks such as Tor to securely answer the questions: {\em how many
unique users are using the distributed service?} and {\em how many
hidden services are being accessed?}  We formally prove the
correctness and security of our protocol in the Universal
Composability framework against an active adversary that compromises
all but one of the aggregation parties. We also show that the
protocol provides security against adaptive corruption of the data
parties, which prevents them from being victims of targeted
compromise. To ensure {\em safe} measurements, we also show how the
output can satisfy differential privacy.

We present a proof-of-concept implementation of the private
set-union cardinality protocol (PSC) and use it to demonstrate that
PSC operates with low computational overhead and reasonable
bandwidth. In particular, for reasonable deployment sizes, the
protocol run at timescales smaller than the typical measurement
period would be and thus is suitable for distributed measurement.}
}

@article{privacy-netprov,
author = {Yuankai Zhang and Adam O'Neill and Micah Sherr and Wenchao Zhou},
journal = {{Proceedings of the VLDB Endowment (PVLDB)}},
title = {{Privacy-preserving Network Provenance}},
volume = {10},
documenturl = {papers/privpres-netprov.pdf},
month = {August},
year = {2017},
abstract = {Network accountability, forensic analysis, and
failure diagnosis are becoming in creasingly
important for network management and security.  {\em
Network provenance} significantly aids network
behavior and revealing the dependencies between
system states.  Although resourceful, network
provenance can sometimes be too rich, revealing
potentially sensitive information that was involved
in system execution.

In this paper, we propose a
cryptographic approach to preserve the
confidentiality of provenance (sub)graphs while
allowing users to query and access the parts of the
graph for which they are authorized. Our proposed
solution is a novel application of searchable
symmetric encryption (SSE) and more generally
structured encryption (SE).  Our SE-enabled
provenance system allows a node to enforce access
control policies over its provenance data even after
the data has been shipped to remote nodes
(\emph{e.g.}, for optimization purposes).  We
present a prototype of our design and demonstrate
its practicality, scalability, and efficiency for
both provenance maintenance and querying.}
}

@inproceedings{dedos-demo,
author = {Henri Maxime Demoulin* and Tavish Vaidya* and Isaac Pedisich and Nik Sultana and Yuankai Zhang and Ang Chen and Andreas Haeberlen and Boon Thau Loo and Linh Thi Xuan Phan and Micah Sherr and Clay Shields and Wenchao Zhou},
booktitle = {Conference on Applications, Technologies, Architectures, and Protocols for Computer Communications (SIGCOMM)},
month = {August},
title = {{A Demonstration of the DeDoS Platform for Defusing Asymmetric DDoS Attacks in Data Centers (Demo)}},
note = {(* co-first authors)},
documenturl = {papers/dedos-sigcomm-demo.pdf},
year = {2017}
}

@inproceedings{eavesdropper-traps,
author = {Tavish Vaidya and Eric Burger and Micah Sherr and Clay Shields},
booktitle = {USENIX Workshop on Cyber Security Experimentation and Test (CSET)},
month = {August},
title = {{Where art thou, Eve? Experiences Laying Traps for Internet Eavesdroppers}},
year = {2017},
documenturl = {papers/whereartthou.pdf},
abstract = {This paper describes a set of experiments we conducted
to answer the question: {\em just how prevalent is
Internet interception?}  That is, if we sent our most sensitive
information (bank information, passwords, etc.) in the clear, should
we expect to regret it?

For a little over a year, we sent different types of Internet traffic over
unencrypted channels between multiple clients and servers located at
geographically diverse locations around the globe.  Our messages
contained seemingly sensitive and valuable information, including login
In total, we found {\em no} instances in which our information was
acted upon by an eavesdropper.

This paper details the numerous challenges---technical, legal, and
ethical---of setting up and maintaining a year-long, large-scale
honeytrap.  We discuss some fundamental limitations of such an
experiment, and argue why our results should not be misinterpreted
to suggest that message encryption is gratuitous.}
}

@inproceedings{histore,
author = {Akshaya Mani and Micah Sherr},
booktitle = {Network and Distributed System Security Symposium (NDSS)},
month = {February},
title = {{Histor$\epsilon$: Differentially Private and Robust Statistics Collection for Tor}},
year = {2017},
documenturl = {papers/histore.pdf},
abstract = {A large volume of existing research attempts to understand who uses Tor and how the network is used (and misused).  However, conducting measurements on the live Tor network, if done improperly, can endanger the security and anonymity of the millions of users who depend on the network to enhance their online privacy.  Indeed, several existing measurement studies of Tor have been heavily criticized for unsafe research practices.

Tor needs privacy-preserving methods of gathering statistics.  The recently proposed PrivEx system demonstrates how data can be safely collected on Tor using techniques from differential privacy.   However, as we demonstrate in this paper, the integrity of the statistics reported by PrivEx is brittle under realistic deployment conditions.  An adversary who operates even a single relay in the volunteer-operated anonymity network can arbitrarily influence the result of PrivEx queries.  We argue that a safe and {\em useful} data collection mechanism must provide both privacy and integrity protections.

This paper presents HisTor$\epsilon$, a privacy-preserving statistics collection scheme based on $(\epsilon,\delta)$-differential privacy that is robust against adversarial manipulation.  We formalize the security guarantees of HisTor$\epsilon$ and show using historical data from the Tor Project that HisTor$\epsilon$ provides useful data collection and reporting with low bandwidth and processing overheads.}
}

@inproceedings{splitstack-hotnets,
author = {Ang Chen and Akshay Sriraman and Tavish Vaidya and Yuankai Zhang and Andreas Haeberlen and Boon Thau Loo and Linh Thi Xuan Phan and Micah Sherr and Clay Shields and Wenchao Zhou},
year = {2016},
booktitle = {ACM Workshop on Hot Topics in Networks (HotNets)},
month = {November},
title = {{Dispersing Asymmetric DDoS Attacks with SplitStack}},
documenturl = {papers/splitstack.pdf},
abstract = {This paper presents SplitStack, an architecture targeted at
mitigating asymmetric DDoS attacks. These attacks are particularly
challenging, since attackers can use a limited amount of resources to
trigger exhaustion of a particular type of system resource on the
server side. SplitStack resolves this by splitting the monolithic
stack into many separable components called minimum splittable units
(MSUs). If part of the application stack is experiencing a DDoS
attack, SplitStack massively \textit{replicates just the affected MSUs},
potentially across many machines. This allows scaling of
the impacted resource separately from the rest of the application
stack, so that resources can be precisely added where needed to
combat the attack. We validate SplitStack via a preliminary
case study, and show that it outperforms naive replication in
defending against asymmetric attacks.}
}

@inproceedings{resilient-opaque-constructs,
author = {Brendan Sheridan and Micah Sherr},
booktitle = {European Symposium on Research in Computer Security (ESORICS)},
month = {September},
title = {{On Manufacturing Resilient Opaque Constructs Against Static Analysis}},
year = {2016},
documenturl = {papers/opaque.pdf},
abstract = {Opaque constructs have developed into a commonly used primitive in
obfuscation, watermarking, and tamper-proofing schemes. However, most
prior work has based the resilience of these primitives on a poorly
defined reduction to a known $\mathcal{NP}$-complete problem. There
has been little scrutiny of the adversarial model and little
discussion of how to generate instances that are always hard. In this
paper, we offer what we believe to be the first complete algorithm for
generating {\em resilient} opaque constructs against static analysis.
We base their resilience on the complexity of 3SAT instances with $cn$
clauses for $c=6$ and $n$ distinct variables.  We draw on existing
theoretical bounds to show that these instances always require
exponential time to defeat under formal notions of resolution
complexity.

This paper also explores in-depth the security of opaque constructs in
real-world settings.  We argue that the common theoretical model used
in prior work (as well as our resilient opaque construction scheme) is
too optimistic. It does not offer practical obfuscation against an
adversary who tolerates some small false positive rate.  We offer a
heuristic-based attack to demonstrate this issue.  Our results suggest
that opaque constructs should be viewed with a high degree of
skepticism until they can be proven secure under more useful
theoretical models.}
}

@inproceedings{hidden-voice-commands,
author = {Nicholas Carlini and Pratyush Mishra and Tavish Vaidya and Yuankai Zhang and Micah Sherr and Clay Shields and David Wagner and Wenchao Zhou},
booktitle = {USENIX Security Symposium (Security)},
month = {August},
title = {{Hidden Voice Commands}},
year = {2016},
documenturl = {papers/voicecommands.pdf},
abstract = {Voice interfaces are becoming more ubiquitous and are now the
primary input method for many devices. We explore in this paper how
they can be attacked with {\em hidden voice commands} that
are unintelligible to human listeners but which are interpreted as
commands by devices.

We evaluate these attacks under two different threat models.
In the black-box model, an attacker uses the speech recognition
system as an opaque oracle.  We show that the adversary can
produce difficult to understand commands that are effective against existing
systems in the black-box model. Under the white-box model, the attacker has full knowledge
of the internals of the speech recognition system and uses
it to create attack commands that we demonstrate through user
testing are not understandable by humans.

We then evaluate several defenses, including notifying the user when a
voice command is accepted; a verbal challenge-response protocol; and a
machine learning approach that can detect our attacks with
99.8\% accuracy.}
}

@article{tor-dataplane-defenses,
author = {Henry Tan and Micah Sherr and Wenchao Zhou},
journal = {Proceedings on Privacy Enhancing Technologies (PoPETS)},
title = {{Data-plane Defenses against Routing Attacks on Tor}},
month = {July},
documenturl = {papers/dataplane-defenses.pdf},
year = {2016},
abstract = {Tor is susceptible to traffic correlation attacks in which an adversary who observes flows entering and leaving the anonymity network can apply statistical techniques to correlate flows and de-anonymize their endpoints.  While an adversary may not be naturally positioned to conduct such attacks, a recent study shows that the Internet's control-plane can be manipulated to increase an adversary's view of the network, and consequently, improve its ability to perform traffic correlation.

This paper explores, in-depth, the effects of control-plane attacks on the security of the Tor network.  Using accurate models of the live Tor network, we quantify Tor's susceptibility to these attacks by measuring the fraction of the Tor network that is vulnerable and the advantage to the adversary of performing the attacks. We further propose defense mechanisms that protect Tor users from manipulations at the control-plane.  Perhaps surprisingly, we show that by leveraging existing trust anchors in Tor, defenses deployed only in the data-plane are sufficient to detect most control-plane attacks. Our defenses do not assume the active participation of Internet Service Providers, and require only very small changes to Tor.  We show that our defenses result in a more than tenfold decrease in the effectiveness of certain control-plane attacks.}
}

@article{mtor,
author = {Dong Lin and Micah Sherr and Boon Thau Loo},
journal = {Proceedings on Privacy Enhancing Technologies (PoPETS)},
title = {{Scalable and Anonymous Group Communication with MTor}},
month = {July},
documenturl = {papers/mtor.pdf},
year = {2016},
abstract = {This paper presents {\em MTor}, a low-latency
anonymous group communication system.  We construct
MTor as an extension to Tor, allowing the
construction of multi-source multicast trees on top
of the existing Tor infrastructure.  MTor does not
depend on an external service to broker the group
communication, and avoids central points of failure
and trust.  MTor's substantial bandwidth savings and
graceful scalability enable new classes of anonymous
applications that are currently too
bandwidth-intensive to be viable through traditional
unicast Tor communication---e.g., group file
transfer, collaborative editing, streaming video,
and real-time audio conferencing.

We detail the
design of MTor and then analyze its performance and
anonymity.  By simulating MTor in Shadow and TorPS
using realistic models of the live Tor network's
topology and recent consensus records from the live
Tor network, we show that MTor achieves a 29\%
savings in network bandwidth and a 73\% reduction in
transmission time as compared to the baseline
approach for anonymous group communication among 20
group members.  We also demonstrate that MTor scales
gracefully with the number of group participants,
and allows dynamic group composition over time.
Importantly, as more Tor users switch to group
communication, we show that the overall performance
and utilization for group communication
improves. Finally, we discuss the anonymity
implications of MTor and measure its resistance to
traffic correlation.}
}

@inproceedings{cocaine-noodles,
author = {Tavish Vaidya and Yuankai Zhang and Micah Sherr and Clay Shields},
booktitle = {USENIX Workshop on Offensive Technologies (WOOT)},
month = {August},
title = {{Cocaine Noodles: Exploiting the Gap between Human and Machine Speech Recognition}},
year = {2015},
abstract = {Hands-free, voice-driven user input is gaining popularity, in part due to the increasing functionalities provided by intelligent digital assistances such as Siri, Cortana, and Google Now, and in part due to the proliferation of small devices that do not support more traditional, keyboard-based input.

In this paper, we examine the gap in the mechanisms of speech recognition between human and machine.  In particular, we ask the question, {\em do the differences in how humans and machines understand spoken speech lead to exploitable vulnerabilities?}  We find, perhaps surprisingly, that these differences can be easily exploited by an adversary to produce sound which is intelligible as a command to a computer speech recognition system but is not easily understandable by humans.  We discuss how a wide range of devices are vulnerable to such manipulation and describe how an attacker might use them to defraud victims or install malware, among other attacks.},
documenturl = {papers/cocaine-noodles.pdf}
}

@article{pvr-ton,
author = {Zhao, Mingchen and Zhou, Wenchao and Gurney, Alexander and Haeberlen, Andreas and Sherr, Micah and Loo, Boon Thau},
journal = {IEEE/ACM Transactions on Networking},
volume = {PP},
number = {99},
title = {{Private and Verifiable Interdomain Routing Decisions}},
year = {2015},
abstract = {Existing secure interdomain routing protocols can verify
such as whether they correspond to a real network path.
It is often useful to verify more complex properties relating to the
route decision procedure -- for example,
whether the chosen route was the best one available,
or whether it was consistent with the network's peering agreements.
However, this is difficult to do without knowing a
network's routing policy and full routing state,
which are not normally disclosed.

In this paper, we show how a network can allow its peers to verify
a number of nontrivial properties of its interdomain routing decisions
\emph{without} revealing any additional information. If all the
properties hold, the peers learn nothing beyond what the interdomain
routing protocol already reveals; if a property does not hold, at
least one peer can detect this and prove the violation. We present
SPIDeR, a practical system that applies this approach to the
Border Gateway Protocol, and we report results from an experimental
evaluation to demonstrate that SPIDeR has a reasonable overhead.
}
}

@inproceedings{pie-detection,
author = {Lisa Singh and Grace Hui Yang and Micah Sherr and Andrew Hian-Cheong and Kevin Tian and Janet Zhu and Sicong Zhang},
booktitle = {IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)},
title = {{Public Information Exposure Detection: Helping Users Understand Their Web Footprints}},
month = {August},
year = {2015},
documenturl = {http://infosense.cs.georgetown.edu/publication/asonam2015-final.pdf},
abstract = {To help users better understand the potential risks
associated with publishing data publicly, as well as
the quantity and sensitivity of information that can
be obtained by combining data from various online
sources, we introduce a novel information exposure
detection framework that generates and analyzes the
web footprints users leave across the social web.
Web footprints are the traces of one’s online social
activities represented by a set of attributes that
are known or can be inferred with a high probability
user from his/her public profiles. Our framework
employs new probabilistic operators, novel
pattern-based attribute extraction from text, and a
population-based inference engine to generate web
footprints. Using a web footprint, the framework
then quantifies a user’s level of information
exposure relative to others with similar traits, as
well as with regard to others in the
population. Evaluation over public profiles from
Twitter) shows that the proposed framework
effectively detects and quantifies information
exposure using a small amount of initial knowledge.
}
}

@inproceedings{geofence-drones,
author = {Tavish Vaidya and Micah Sherr},
booktitle = {Security Protocols Workshop},
month = {March},
title = {{Mind your $(R, \Phi)$s: Location-Based Privacy Controls for Consumer Drones}},
year = {2015},
abstract = {This position paper explores the threat to individual privacy due to the widespread use of consumer drones. Present day consumer drones are equipped with sensors such as cameras and microphones, and their types and numbers can be well expected to increase in future.  Drone operators have absolute control on where the drones fly and what the on-board sensors record with no options for bystanders to protect their privacy.  This position paper proposes a policy language that allows homeowners, businesses, governments, and privacy-conscious individuals to specify location access-control for drones, and discusses how these policy-based controls might be realized in practice.

This position paper also explores the potential future problem of managing consumer drone traffic that is likely to emerge with increasing use of consumer drones for various tasks.
It proposes a privacy preserving traffic management protocol for directing drones towards their respective destinations without requiring drones to reveal their destinations.},
documenturl = {papers/priv-drones.pdf}
}

@inproceedings{muffler,
author = {W. Brad Moore and Henry Tan and Micah Sherr and Marcus A. Maloof},
booktitle = {Financial Cryptography and Data Security (FC)},
month = {January},
title = {{Multi-Class Traffic Morphing for Encrypted VoIP Communication}},
year = {2015},
abstract = {In a re-identification attack, an adversary analyzes the sizes of intercepted encrypted VoIP packets to infer characteristics of the underlying audio--for example, the language or individual phrases spoken on the encrypted VoIP call. Traffic morphing has been proposed as a general solution for defending against such attacks. In traffic morphing, the sender pads ciphertext to obfuscate the distribution of packet sizes, impairing the adversary's ability to accurately identify features of the plaintext.

This paper makes several contributions to traffic morphing defenses. First, we argue that existing traffic morphing techniques are ineffective against certain reidentification attacks since they (i) require a priori knowledge of what information the adversary is trying to learn about the plaintext (e.g., language, the identity of the speaker, the speaker's gender, etc.), and (ii) perform poorly with a large number of classes. Second, we introduce new algorithms for traffic morphing that are more generally applicable and do not depend on assumptions about the goals of the adversary. Finally, we evaluate our defenses against re-identification attacks, and show, using a large real-world corpus of spoken audio samples, that our techniques reduce the adversary's accuracy by 94\% with low computational and bandwidth overhead.},
documenturl = {papers/muffler.pdf}
}

@article{accountable-wiretapping-journal,
author = {Adam Bates and Kevin Butler and Micah Sherr and Clay Shields and Patrick Traynor and Dan Wallach},
journal = {Journal of Computer Security},
volume = {23},
pages = {167--195},
year = {2015},
title = {{Accountable Wiretapping -or- I Know They Can Hear You Now}},
documenturl = {http://content.iospress.com/articles/journal-of-computer-security/jcs515},
abstract = {In many democratic countries, CALEA wiretaps are
used by law enforcement agencies to perform investigations and
gather evidence for legal procedures.  However, existing CALEA
wiretap implementations are often engineered with the assumption
that wiretap operators are trustworthy and wiretap targets do not
attempt to evade the wiretap.  Although it may be possible to
construct more robust wiretap architectures by reengineering
significant portions of the telecommunications infrastructure, such
efforts are prohibitively costly.  This paper instead proposes a
lightweight {\em accountable wiretapping} system for enabling secure
audits of existing CALEA wiretapping systems.  Our proposed system
maintains a tamper-evident encrypted log over wiretap events,
enforces access controls over wiretap records, and enables
privacy-preserving aggregate queries and compliance checks.  We
demonstrate using campus-wide telephone trace data from a large
university that our approach provides efficient auditing
functionalities while incurring only modest overhead.  Based on
publicly available wiretap reporting statistics, we conservatively
estimate that our architecture can support tamper-evident logging
for all of the United States' ongoing CALEA wiretaps using three
commodity PCs.}
}

@inproceedings{sanity,
author = {Ang Chen and W. Brad Moore and Hanjun Xiao and Andreas
Haeberlen and Linh Thi Xuan Phan and Micah Sherr and
Wenchao Zhou},
title = {{Detecting Covert Timing Channels with Time-Deterministic Replay}},
year = {2014},
month = {October},
booktitle = {USENIX Symposium on Operating Systems Design and Implementation (OSDI)},
documenturl = {papers/tdr.pdf},
abstract = {This paper presents a mechanism called \emph{time-deterministic replay (TDR)}
that can reproduce the execution of a program, \emph{including its precise
timing}. Without TDR, reproducing the timing of
an execution is difficult because there are many sources of timing variability --
such as preemptions, hardware interrupts, cache effects, scheduling decisions, etc.
TDR uses a combination of techniques to either mitigate or eliminate most of
these sources of variability. Using a prototype implementation of TDR in a
Java Virtual Machine, we show that it is possible to reproduce the timing to
within 1.5\% of the original execution, even on commodity hardware.

The paper discusses several potential applications of TDR, and studies
one of them in detail: the detection of a covert timing
channel. Timing channels can be used to exfiltrate information from a
compromised machine; they work by subtly varying the timing of the
machine's outputs, and it is this variation that can be detected with
TDR. Unlike prior solutions, which generally look for a specific type
of timing channel, our approach can detect a wide variety of
channels with high accuracy.}
}

@inproceedings{disruption-mac,
author = {Henry Tan and Chris Wacek and Calvin Newport and Micah Sherr},
booktitle = {International Conference on Principles of Distributed Systems (OPODIS)},
month = {December},
title = {{A Disruption-Resistant MAC Layer for Multichannel Wireless Networks}},
year = {2014},
documenturl = {papers/mac-disruption.pdf},
abstract = {Wireless networking occurs on a shared medium which renders communication vulnerable to disruption from other networks, environmental interference, and even malicious jammers. The standard solution to this problem is to deploy coordinated spread spectrum technologies that require pre-shared secrets between communicating devices. These secrets can be used to coordinate hopping patterns or spreading sequences. In this paper, by contrast, we study the local broadcast and unicast problems in a disrupted multichannel network with no pre-shared secrets between devices. Previous work in this setting focused on the special case of a single pre-designated sender in a single hop network topology.  We consider in this paper, for the first time, upper and lower bounds to these problems in multihop topologies with multiple senders. To validate the potential real world application of our strategies, we conclude by describing a general purpose MAC protocol that uses the algorithms as key primitives, and validates its usefulness with a proof-of-concept implementation that runs the protocol on commodity hardware.}
}

@inproceedings{neverbeenkist,
author = {Rob Jansen and John Geddes and Chris Wacek and Micah Sherr and Paul Syverson},
booktitle = {USENIX Security Symposium (USENIX)},
title = {{Never Been KIST: Tor's Congestion Management Blossoms with Kernel-Informed Socket Transport}},
month = {August},
year = {2014},
abstract = {Tor's growing popularity and user diversity has
resulted in network performance problems that are
not well understood. A large body of work has
attempted to solve these problems without a complete
understanding of where congestion occurs in Tor. In
this paper, we first study congestion in Tor at
individual relays as well as along the entire
end-to-end Tor path and find that congestion occurs
almost exclusively in egress kernel socket
buffers. We then analyze Tor's socket interactions
and discover two major issues affecting congestion:
Tor writes sockets sequentially, and Tor writes as
much as possible to each socket. We thus design,
implement, and test KIST: a new socket management
algorithm that uses real-time kernel information to
\textit{dynamically compute the amount to write} to
each socket while considering \textit{all writable
circuits} when scheduling new cells. We find that,
in the medians, KIST reduces circuit congestion by
over 30 percent, reduces network latency by 18
percent, and increases network throughput by nearly
10 percent. We analyze the security of KIST and find
an acceptable performance and security trade-off, as
it does not significantly affect the outcome of
well-known latency and throughput attacks.  While
our focus is Tor, our techniques and observations
should help analyze and improve overlay and
application performance, both for security
applications and in general.},
documenturl = {papers/neverbeenkist.pdf}
}

@article{humanets-journal,
author = {Adam Aviv and Micah Sherr and Matt Blaze and Jonathan Smith},
title = {Privacy-Aware Message Exchanges for Humanets},
journal = {Elsevier Journal of Computer Communications},
volume = {48},
pages = {30--43},
month = {July},
year = {2014},
documenturl = {http://www.sciencedirect.com/science/article/pii/S0140366414000991},
abstract = {This paper describes a novel privacy-aware geographic routing protocol for Human Movement Networks (HumaNets). HumaNets are fully decentralized opportunistic store-and-forward, delay-tolerant networks composed of smartphone devices. Such networks allow participants to exchange messages phone-to-phone and have applications where traditional infrastructure is unavailable (e.g., during a disaster) and in totalitarian states where cellular network monitoring and censorship are employed. Our protocol leverages self-determined location profiles of smartphone operators' movements as a predictor of future locations, enabling efficient geographic routing over metropolitan-wide areas. Since these profiles contain sensitive information about participants' prior movements, our routing protocol is designed to minimize the exposure of sensitive information during a message exchange. We demonstrate via simulation over both synthetic and real-world trace data that our protocol is highly scalable, leaks little information, and balances privacy and efficiency: messages are approximately 20\% more likely to be delivered than similar random walk protocols, and the median latency is comparable to epidemic protocols while requiring an order of magnitude fewer messages.}
}

@inproceedings{sdn-eyes,
author = {Bates, Adam and Butler, Kevin and Haeberlen, Andreas and Sherr, Micah and Zhou, Wenchao},
booktitle = {Workshop on Security of Emerging Networking Technologies (SENT)},
month = {February},
title = {{Let SDN Be Your Eyes: Secure Forensics in Data Center Networks}},
year = {2014},
documenturl = {papers/forensics-sent2014.pdf},
abstract = {Discovering the causes of incorrect behavior in large networks is often difficult.  This difficulty is compounded when some machines in the network are compromised, since these compromised machines may use deception or tamper with data to frustrate forensic analysis.  Recently proposed forensic tools enable administrators to learn the causes of some system states in a partially compromised network, but these tools are inherently unable to (1)~observe covert communication between compromised nodes or (2)~detect attempts to exfiltrate sensitive data.

In this paper, we observe that the emergence of Software-Defined Networking (SDN) offers interesting new opportunities for network forensics. We sketch the design of an SDN-based forensic system that can be used to investigate a wide variety of faults in data center networks, including previously unobservable attacks such as data exfiltration and collusion between compromised nodes. Our key insight is that the network itself can be used as a point of observation, which gives us a holistic view of network activity. We show that a collection of lightweight middleboxes would be sufficient to support this functionality, and we discuss several additional challenges and opportunities for SDN-based forensic tools.}
}

@inproceedings{censorship-sideeffect,
author = {Henry Tan and Micah Sherr},
booktitle = {International Workshop on Security Protocols},
title = {{Censorship Resistance as a Side-Effect}},
month = {March},
abstract = {This position paper presents the following thought
experiment: can we build communication protocols
that (1)~are sufficiently useful that they achieve
mechanisms and (2)~thwart censorship as a
consequence of their design?  We posit that a useful
communication platform that is inherently resistant
to traffic analysis, if widely adopted and {\em used
primarily for purposes not related to censorship
circumvention}, may be too politically and
economically costly for a government to block.},
documenturl = {papers/censorship-as-sideeffect.pdf},
year = {2014}
}

@inproceedings{fair-mis,
author = {Jeremy Fineman and Calvin Newport and Micah Sherr and Tonghe Wang},
booktitle = {IEEE International Parallel \& Distributed Processing Symposium (IPDPS)},
title = {{Fair Maximal Independent Sets}},
month = {May},
year = {2014},
abstract = {Finding a maximal independent set (MIS) is a
classic problem in graph theory that has been widely
studied in the context of distributed
algorithms. Standard distributed solutions to the
MIS problem focus on time complexity. In this paper,
we also consider {\em fairness}.  For a given MIS
algorithm $\mathcal{A}$ and graph $G$, we define the
{\em inequality factor} for $\mathcal{A}$ on $G$ to be
the largest ratio between the probabilities of the
nodes joining an MIS in the graph.  We say an
algorithm is {\em fair} with respect to a family of
graphs if it achieves a constant inequality factor
for all graphs in the family.  In this paper, we
seek efficient and fair algorithms for common graph
families.  We begin by describing an algorithm that
is fair and runs in $O(\log^{*} n)$-time in rooted
trees of size $n$.  Moving to unrooted trees, we
describe a fair algorithm that runs in $O(\log n)$
time.  Generalizing further to bipartite graphs, we
describe a third fair algorithm that requires
$O(\log^2{n})$ rounds.  We also show a fair
algorithm for planar graphs that runs in
$O(\log^2{n})$ rounds, and describe an algorithm
that can be run in any graph, yielding good bounds
on inequality in regions that can be efficiently
colored with a small number of colors.  We conclude
our theoretical analysis with a lower bound that
identifies a graph where all MIS algorithms achieve
an inequality bound in $\Omega(n)$---eliminating the
possibility of an MIS algorithm that is fair in all
graphs.  Finally, to motivate the need for provable
fairness guarantees, we simulate both our tree
algorithm and Luby's MIS algorithm in a variety of
different tree topologies---some synthetic and some
derived from real world data. Whereas our algorithm
always yield an inequality factor $\leq 3.25$ in
these simulations, Luby's algorithms yields factors
as large as $168$.},
documenturl = {papers/fair-mis.pdf}
}

@article{a3-net-journal,
author = {Micah Sherr and Harjot Gill and Taher Aquil Saeed and Andrew Mao and William R. Marczak and Saravana Soundararajan and Wenchao Zhou and Boon Thau Loo and Matt Blaze},
journal = {Elsevier Computer Networks},
month = {February},
pages = {206-227},
title = {{The Design and Implementation of the A$^3$ Application-Aware Anonymity Platform}},
volume = {58},
year = {2014},
documenturl = {http://www.sciencedirect.com/science/article/pii/S1389128613003599},
abstract = {This paper presents the design and implementation of Application-Aware Anonymity (A$^3$), an extensible platform for rapidly prototyping and evaluating anonymity protocols on the Internet. A$^3$ supports the development of highly tunable anonymous protocols that enable applications to tailor their anonymity properties and performance characteristics according to specific communication requirements.

To support flexible path construction, A$^3$ uses a declarative language to compactly specify path selection and instantiation policies. We demonstrate that our declarative language is sufficiently expressive to encode novel multi-metric performance constraints as well as existing relay selection algorithms employed by Tor and other anonymity systems, using only a few lines of concise code. We experimentally evaluate A$^3$ using a combination of trace-driven simulations and a deployment on PlanetLab, as well as a case-study of A$^3$-enabled voice-over-IP communication. Our experimental results demonstrate that A$^3$ can flexibly and efficiently support a wide range of path selection and instantiation strategies at low performance overhead.}
}

@inproceedings{senser,
author = {Jordan Wilberding and Andrew Yates and Micah Sherr and Wenchao Zhou},
booktitle = {Annual Computer Security Applications Conference (ACSAC)},
title = {{Validating Web Content with Senser}},
month = {December},
year = {2013},
abstract = {This paper introduces Senser, a system for
validating retrieved web content.  Senser does not
rely on a PKI and operates {\em even when SSL/TLS is
not supported by the web server}.  Senser operates
as a network of proxies located at different vantage
points on the Internet.  Clients query a random
subset of Senser proxies for compact descriptions of
a desired web page, and apply consensus and matching
algorithms to the returned results to locally render
a majority'' web page.  To ensure diverse
selections of proxies (and consequently decrease an
adversary's ability to manipulate a majority of the
proxies' requests), Senser leverages Internet
mapping systems that accurately predict AS-level
paths between available proxies and the desired web
page.  We demonstrate using a deployment of Senser
on Amazon EC2 that Senser detects and mitigates
attempts by adversaries to manipulate web content
--- even when controlling large collections of
autonomous systems --- while maintaining reasonable
documenturl = {papers/senser.pdf}
}

@inproceedings{tor-realistic-adversaries,
author = {Aaron Johnson and Chris Wacek and Rob Jansen and Micah Sherr and Paul Syverson},
title = {{Users Get Routed: Traffic Correlation on Tor By Realistic Adversaries}},
booktitle = {{ACM Conference on Computer and Communications Security (CCS)}},
month = {November},
documenturl = {papers/users-get-routed.pdf},
year = {2013},
abstract = {We present the first analysis of the popular Tor
anonymity network that indicates the security of
typical users against reasonably realistic
adversaries in the Tor network or in the underlying
Internet. Our results show that Tor users are far
more susceptible to compromise than indicated by
prior work.  Specific contributions of the paper
include (1) a model of various typical kinds of
users, (2) an adversary model that includes Tor
network relays, autonomous systems (ASes), Internet
exchange points (IXPs), and groups of IXPs drawn
from empirical study, (3) metrics that indicate how
secure users are over a period of time, (4) the most
accurate topological model to date of ASes and IXPs
as they relate to Tor usage and network
configuration, (5) a novel realistic Tor path
simulator ({\em TorPS}), and (6) analyses of
security making use of all the above. To show that
our approach is useful to explore alternatives and
not just Tor as currently deployed, we also analyze
a published alternative path selection algorithm,
Congestion-Aware Tor. We create an empirical model
of Tor congestion, identify novel attack vectors,
and show that it too is more vulnerable than
previously indicated.}
}

@inproceedings{site-based-inference,
author = {W. Brad Moore and Yifang Wei and Adam Orshefsky and Micah Sherr and Lisa Singh and Hui Yang},
booktitle = {ASE/IEEE International Conference on Information Privacy, Security, Risk and Trust (PASSAT)},
note = {Short paper},
title = {{Understanding Site-Based Inference Potential for Identifying Hidden Attributes}},
month = {September},
documenturl = {papers/understanding-inference.pdf},
abstract = {The popularity of social networking sites has led to
the creation of massive online databases containing (potentially
sensitive) personal information, portions of which are often
publicly accessible. Although most popular social networking sites
allow users to customize the degree to which their information
is publicly exposed, the disclosure of even a small, seemingly
innocuous set of profile attributes may be sufficient to infer
a surprisingly revealing set of attribute-value pairings. This
paper analyzes the predictive accuracy of existing and ensemble
inference algorithms to infer hidden attributes using publicly
exposed attribute-values. For our tested population, we ﬁnd that
(i) certain attributes are more accurately predicted than others,
(ii) each tested inference algorithm is well-suited for inferring a
particular subset of attributes, and (iii) these subsets of inferable
attributes often have little overlap. Taken collectively, our results
indicate that the amount of information one can extract from a
given user's public proﬁle is often greater than the sum of the
attributes that the user has chosen to publish.},
year = {2013}
}

@inproceedings{pst-vulnerability,
author = {John Ferro and Lisa Singh and Micah Sherr},
booktitle = {International Conference on Privacy, Security and Trust (PST)},
month = {July},
title = {{Identifying Individual Vulnerability Based on Public Data}},
year = {2013},
abstract = {Companies and government agencies frequently own
data sets containing personal information about
clients, survey responders, or users of a
product. Sometimes these organizations are required
or wish to release anonymized versions of this
information to the public. Prior to releasing these
data, they use established privacy preservation
methods such as binning, data perturbation, and data
suppression to maintain the anonymity of clients,
customers, or survey participants.  However,
existing work has shown that common privacy
preserving measures fail when anonymized data are
combined with data from online social networks,
social media sites, and data aggregation sites.

This paper introduces a methodology for determining
the {\em vulnerability} of individuals in a
pre-released data set to re-identification using
public data. As part of this methodology, we propose
novel metrics to quantify the amount of information
that can be gained from combining pre-released data
with publicly available online data. We then
investigate how to utilize our metrics to identify
individuals in the data set who may be particularly
vulnerable to this form of data combination. We
demonstrate the effectiveness of our methodology on
a real world data set using public data from both
social networking and data aggregation sites.},
documenturl = {papers/pst-vulnerability.pdf}
}

@inproceedings{tor-relayselection,
author = {Chris Wacek and Henry Tan and Kevin Bauer and Micah Sherr},
booktitle = {Network and Distributed System Security Symposium (NDSS)},
title = {{An Empirical Evaluation of Relay Selection in Tor}},
month = {February},
year = {2013},
abstract = {While Tor is the most popular low-latency anonymity network in use
today, Tor suffers from a variety of performance problems that
continue to serve as a strong disincentive to wide scale
adoption. One reason why Tor is slow is due to the manner in which
clients select Tor relays. There have been a number of recent
proposals for modifying Tor's relay selection algorithm, often to
achieve improved bandwidth, latency, and/or anonymity.  This paper
explores the anonymity and performance trade-offs of the proposed
relay selection techniques using highly accurate topological models
that capture the actual Tor network's autonomous system (AS)
boundaries, points-of-presence, inter-relay latencies, and relay
performance characteristics.

Using realistic network models, we conduct a whole-network
evaluation with varying traffic workloads to understand the
potential performance benefits of a comprehensive set of relay
selection proposals from the Tor literature.  We also quantify the
anonymity properties of each approach using our network model in
combination with simulations fueled by data from the live Tor
network.  Our results indicate that a combination of
bandwidth-weighting and recently proposed virtual coordinate-based
relay selection provides the greatest
performance improvement of the proposals surveyed.},
documenturl = {papers/tor-relaystudy.pdf}
}

@inproceedings{dtap,
author = {Wenchao Zhou and Suyog Mapara and Yiqing Ren and Yang Li and Andreas Haeberlen and Zachary Ives and Boon Thau Loo and Micah Sherr},
booktitle = {International Conference on Very Large Data Bases (VLDB)},
title = {{Distributed Time-aware Provenance}},
year = {2013},
month = {August},
abstract = {The ability to reason about changes in a
distributed system's state enables network
misconfigurations, detect intrusions, and pinpoint
performance bottlenecks. We propose a novel
provenance model called Distributed Time-aware
Provenance (DTaP) that aids distributed system
forensics and debugging by explicitly representing
time, distributed state, and state changes. Using a
distributed Datalog abstraction for modeling
distributed protocols, we prove that the DTaP model
provides a sound and complete representation that
correctly captures dependencies among events in a
DistTape, an implementation of the DTaP model that
uses novel distributed storage structures, query
processing, and cost-based optimization techniques
to efficiently query time-aware provenance in a
distributed setting. Using two example systems
(declarative network routing and Hadoop MapReduce),
we demonstrate that DistTape can efficiently
maintain and query time-aware provenance at low
communication and computation cost.},
documenturl = {papers/dtap-vldb13.pdf}
}

@inproceedings{collaborative-poster,
title = {{Collaborative Verification with Privacy Guarantees (Poster)}},
author = {Mingchen Zhao and Wenchao Zhou and Alexander J. T. Gurney and Andreas Haeberlen and Micah Sherr and Boon Thau Loo},
year = {2012},
month = {October},
booktitle = {USENIX Symposium on Operating Systems Design and Implementation (OSDI)}
}

@inproceedings{humanets,
author = {Adam J. Aviv and Micah Sherr and Matt Blaze and Jonathan M. Smith},
booktitle = {European Symposium on Research in Computer Security (ESORICS)},
title = {{Privacy-Aware Message Exchanges for Geographically Routed Human Movement Networks}},
year = {2012},
month = {September},
abstract = {  This paper introduces a privacy-aware geographic routing
protocol for {\em Human Movement Networks} (HumaNets). HumaNets are
fully decentralized opportunistic and delay-tolerate
networks composed of smartphone devices. Such networks allow
participants to exchange messages {\em
phone-to-phone} and have applications where traditional
infrastructure is unavailable (e.g., during a disaster) and in
totalitarian states where cellular network monitoring and censorship
are employed.  Our protocol leverages self-determined {\em location
profiles} of smartphone operators' movements as a predictor of
future locations, enabling efficient geographic routing over
metropolitan-wide areas. Since these profiles contain sensitive
information about participants' {\em prior movements}, our routing
protocol is designed to minimize the exposure of sensitive
information during a message exchange. We demonstrate via simulation
over both synthetic and real-world trace data that our protocol is
highly scalable, leaks little information, and balances privacy and
efficiency: messages are 30\% more likely to be delivered than
similar random walk protocols, and the median latency is only
23-28\% greater than epidemic protocols while requiring an order of
magnitude fewer messages.},
documenturl = {papers/aviv-esorics12-privacy.pdf}
}

@inproceedings{safest-redteaming,
title = {{Collaborative Red Teaming for Anonymity System Evaluation}},
booktitle = {USENIX Workshop on Cyber Security Experimentation and Test (CSET)},
author = {Sandy Clark and Chris Wacek and Matt Blaze and Boon Thau Loo and Micah Sherr and Clay Shields and Jonathan Smith},
month = {August},
abstract = {This paper describes our experiences as researchers and developers
during red teaming exercises of the SAFEST anonymity system.  We
argue that properly evaluating an anonymity system --- particularly
one that makes use of topological information and diverse relay
selection strategies, as does SAFEST --- presents unique challenges
present our efforts towards meeting these challenges, and discuss
which developers play a supporting role during the evaluation
process.},
documenturl = {papers/collaborative.pdf},
year = {2012}
}

@inproceedings{pvr,
author = {Mingchen Zhao and Wenchao Zhou and Alexander Gurney and Andreas Haeberlen and Micah Sherr and Boon Thau Loo},
year = {2012},
month = {August},
title = {{Private and Verifiable Interdomain Routing Decisions}},
booktitle = {Conference on Applications, Technologies, Architectures, and Protocols for Computer Communications (SIGCOMM)},
documenturl = {papers/pvr.pdf},
abstract = {Existing secure interdomain routing protocols can verify
such as whether they correspond to a real network path.
It is often useful to verify more complex properties relating to the
route decision procedure; for example,
whether the chosen route was the best one available,
or whether it was consistent with the network's peering agreements.
However, this is difficult to do without knowing a
network's routing policy and full routing state,
which are not normally disclosed.

In this paper, we show how a network can allow its peers to verify
a number of nontrivial properties of its interdomain routing decisions
\emph{without} revealing any additional information. If all the
properties hold, the peers learn nothing beyond what the interdomain
routing protocol already reveals; if a property does not hold, at
least one peer can detect this and prove the violation. We present
SPIDeR, a practical system that applies this approach to the
Border Gateway Protocol, and we report results from an experimental
evaluation to demonstrate that SPIDeR has a reasonable overhead.}
}

@inproceedings{jackpot,
author = {Henry Tan and Nazli Goharian and Micah Sherr},
title = {{\$100,000 Prize Jackpot. Call Now! Identifying the Pertinent Features of SMS Spam (Poster)}}, year = {2012}, month = {August}, booktitle = {ACM Conference on Research and Development in Information Retrieval (SIGIR)}, abstract = {Mobile SMS spam is on the rise and is a prevalent problem. While recent work has shown that simple machine learning techniques can distinguish between ham and spam with high accuracy, this paper explores the individual contributions of various textual features in the classification process. {\em Our results reveal the surprising finding that simple is better}: using the largest spam corpus of which we are aware, we find that using simple textual features is sufficient to provide accuracy that is nearly identical to that achieved by the best known techniques, while achieving a twofold speedup.} }  @inproceedings{accountable-wiretapping, author = {Adam Bates and Kevin Butler and Micah Sherr and Clay Shields and Patrick Traynor and Dan Wallach}, year = {2012}, title = {{Accountable Wiretapping -or- I Know They Can Hear You Now}}, booktitle = {Network and Distributed System Security Symposium (NDSS)}, documenturl = {papers/accountable-wiretapping.pdf}, month = {February}, abstract = {In many democratic countries, CALEA wiretaps are used by law enforcement agencies to perform investigations and gather evidence for legal procedures. However, existing CALEA wiretap implementations are often engineered with the assumption that wiretap operators are trustworthy and wiretap targets do not attempt to evade the wiretap. Although it may be possible to construct more robust wiretap architectures by reengineering significant portions of the telecommunications infrastructure, such efforts are prohibitively costly. This paper instead proposes a lightweight {\em accountable wiretapping} system for enabling secure audits of existing CALEA wiretapping systems. Our proposed system maintains a tamper-evident encrypted log over wiretap events, enforces access controls over wiretap records, and enables privacy-preserving aggregate queries and compliance checks. We demonstrate using campus-wide telephone trace data from a large university that our approach provides efficient auditing functionalities while incurring only modest overhead. Based on publicly available wiretap reporting statistics, we conservatively estimate that our architecture can support tamper-evident logging for all of the United States' ongoing CALEA wiretaps using three commodity PCs.} }  @inproceedings{eating-cake, author = {Alexander J.T. Gurney and Andreas Haeberlen and Wenchao Zhou and Micah Sherr and Boon Thau Loo}, title = {{Having your Cake and Eating it too: Routing Security with Privacy Protections}}, year = {2011}, booktitle = {ACM Workshop on Hot Topics in Networks (HotNets)}, month = {November}, documenturl = {papers/cake.pdf}, abstract = {Internet Service Providers typically do not reveal details of their interdomain routing policies due to security concerns, or for commercial or legal reasons. As a result, it is difficult to hold ISPs accountable for their contractual agreements. Existing solutions can check basic properties, such as whether route announcements correspond to valid routes, but do not verify how these routes were chosen. In essence, today's Internet forces one to choose between per-AS privacy and verifiability. In this paper, we argue that making this difficult tradeoff is unnecessary. We propose \emph{private and verifiable routing} (PVR), a technique that enables ISPs to check whether their neighbors are fulfilling their contractual promises to them, and to obtain evidence of any violations, \emph{without} disclosing information that the routing protocol does not already reveal. As initial evidence that PVR is feasible, we sketch a PVR system that can verify some simple BGP policies. We conclude by highlighting several research challenges as future work.} }  @inproceedings{tortoise, author = {Brad Moore and Chris Wacek and Micah Sherr}, title = {{Exploring the Potential Benefits of Expanded Rate Limiting in Tor: Slow and Steady Wins the Race With Tortoise}}, year = {2011}, booktitle = {Annual Computer Security Applications Conference (ACSAC)}, month = {December}, documenturl = {papers/tortoise.pdf}, abstract = {Tor is a volunteer-operated network of application-layer relays that enables users to communicate privately and anonymously. Unfortunately, Tor often exhibits poor performance due to congestion caused by the unbalanced ratio of clients to available relays, as well as a disproportionately high consumption of network capacity by a small fraction of filesharing users. This paper argues the very counterintuitive notion that {\em slowing down traffic on Tor will increase the bandwidth capacity of the network} and consequently improve the experience of interactive web users. We introduce Tortoise, a system for rate limiting Tor at its ingress points. We demonstrate that Tortoise incurs little penalty for interactive web users, while significantly decreasing the throughput for filesharers. Our techniques provide incentives to filesharers to configure their Tor clients to also relay traffic, which in turn improves the network's overall performance. We present large-scale emulation results that indicate that interactive users will achieve a significant speedup if even a small fraction of clients opt to run relays.} }  @inproceedings{tracking-adversaries, author = {Wenchao Zhou and Qiong Fei and Arjun Narayan and Andreas Haeberlen and Boon Thau Loo and Micah Sherr}, year = {2011}, booktitle = {ACM Symposium on Operating Systems Principles (SOSP)}, title = {{Secure Network Provenance}}, month = {October}, documenturl = {papers/snp-sosp.pdf}, abstract = {This paper introduces \emph{secure network provenance (SNP)}, a novel technique that enables networked systems to explain to their operators \emph{why} they are in a certain state -- e.g., why a suspicious routing table entry is present on a certain router, or where a given cache entry originated. SNP provides network forensics capabilities by permitting operators to track down faulty or misbehaving nodes, and to assess the damage such nodes may have caused to the rest of the system. SNP is designed for adversarial settings and is robust to manipulation; its tamper-evident properties ensure that operators can detect when compromised nodes lie or falsely implicate correct nodes. We also present the design of {\sc SNooPy}, a general-purpose SNP system. To demonstrate that {\sc SNooPy} is practical, we apply it to three example applications: the Quagga BGP daemon, a declarative implementation of Chord, and Hadoop MapReduce. Our results indicate that {\sc SNooPy} can efficiently explain state in an adversarial setting, that it can be applied with minimal effort, and that its costs are low enough to be practical.} }  @inproceedings{experimentor, author = {Kevin Bauer and Micah Sherr and Damon McCoy and Dirk Grunwald}, year = {2011}, title = {{ExperimenTor: A Testbed for Safe and Realistic Tor Experimentation}}, booktitle = {USENIX Workshop on Cyber Security Experimentation and Test (CSET)}, month = {August}, documenturl = {papers/experimentor.pdf}, abstract = {Tor is one of the most widely-used privacy enhancing technologies for achieving online anonymity and resisting censorship. Simultaneously, Tor is also an evolving research network in which investigators perform experiments to improve the network's resilience to attacks and enhance its performance. Existing methods for studying Tor have included analytical modeling, simulations, small-scale network emulations, small-scale PlanetLab deployments, and measurement and analysis of the live Tor network. Despite the growing body of work concerning Tor, there is no widely accepted methodology for conducting Tor research in a manner that preserves realism while protecting live users' privacy. In an effort to propose a standard, rigorous experimental framework for conducting Tor research in a way that ensures safety and realism, we present the design of {\em ExperimenTor}, a large-scale Tor network emulation toolkit and testbed. We also report our early experiences with prototype testbeds currently deployed at four research institutions.} }  @inproceedings{selfexplain, author = {Wenchao Zhou and Qiong Fei and Andreas Haeberlen and Boon Thau Loo and Micah Sherr}, title = {{Towards Self-Explaining Networks}}, booktitle = {Future Internet Workshop (FIW)}, year = {2011}, month = {June}, abstract = {In this paper, we argue that networks should be able to explain to their operators why they are in a certain state, even if --– and particularly if --- they have been compromised by an attacker. Such a capability would be useful in forensic investigations, where an operator observes an unexpected state and must decide whether it is benign or an indication that the system has been compromised. Using a very pessimistic threat model in which a malicious adversary can completely compromise an arbitrary subset of the nodes in the network, we argue that we cannot expect to get a complete and correct explanation in all possible cases. However, we also show that, based on recent advances in the systems and the database communities, it seems possible to get a slightly weaker guarantee: for any state change that directly or indirectly affects a correct node, we can either obtain a correct explanation or eventually identify at least one compromised node. We discuss the challenges involved in building systems that provide this property, and we report initial results from an early prototype.}, documenturl = {papers/snp-fiw.pdf} }  @inproceedings{nettrails-demo, author = {Wenchao Zhou and Qiong Fei and Sandy Sun and Tao Tao and Andreas Haeberlen and Zachary Ives and Boon Thau Loo and Micah Sherr}, title = {{NetTrails: A Declarative Platform for Provenance Maintenance and Querying in Distributed Systems (Demo)}}, booktitle = {ACM SIGMOD International Conference on Management of Data (SIGMOD)}, month = {June}, year = {2011} }  @inproceedings{prov-without-trust, title = {{Secure Forensics without Trusted Components (Poster)}}, author = {Wenchao Zhou and Qiong Fei and Arjun Narayan and Andreas Haeberlen and Boon Thau Loo and Micah Sherr}, booktitle = {USENIX Symposium on Networked Systems Design and Implementation (NSDI)}, month = {March}, year = {2011} }  @inproceedings{data-centric, title = {{Towards a Data-centric View of Cloud Security}}, author = {Wenchao Zhou and Micah Sherr and William R. Marczak and Zhuoyao Zhang and Tao Tao and Boon Thau Loo and Insup Lee}, year = {2010}, month = {October}, booktitle = {International Workshop on Cloud Data Management (CloudDB)}, abstract = {Cloud security issues have recently gained traction in the research community, with much of the focus primarily concentrated on securing the operating systems and virtual machines on which the services are deployed. In this paper, we take an alternative perspective and propose a {\em data-centric} view of cloud security. In particular, we explore the security properties of secure data sharing {\em between} applications hosted in the cloud. We discuss data management challenges in the areas of secure distributed query processing, system analysis and forensics, and query correctness assurance, and describe our current efforts towards meeting these challenges using our {\em Declarative Secure Distributed Systems} (DS2) platform.}, documenturl = {papers/ds2cloud.pdf} }  @inproceedings{evading, title = {{Evading Cellular Data Monitoring with Human Movement Networks}}, author = {Adam J. Aviv and Micah Sherr and Matt Blaze and Jonathan M. Smith}, year = {2010}, month = {August}, booktitle = {USENIX Workshop on Hot Topics in Security (HotSec)}, note = {Errata available at \url{http://www.cs.georgetown.edu/~msherr/papers/mobibot-errata.pdf}}, abstract = {Cellular networks are centrally administered, enabling service providers and their governments to conduct system-wide monitoring and censorship of mobile communication. This paper presents HumaNets, a fully decentralized, smartphone-to-smartphone (and hence human-to-human) message passing scheme that permits unmonitored message communication even when all cellular traffic is inspected. HumaNets message routing protocols exploit human mobility patterns to significantly increase communication efficiency while limiting the exposure of messages to mobile service providers. Initial results from trace-driven simulation show that 85\% of messages reach their intended destinations while using orders of magnitude less network capacity than naive epidemic flooding techniques.}, documenturl = {papers/mobibot.pdf} }  @inproceedings{netprov, title = {{Efficient Querying and Maintenance of Network Provenance at Internet-Scale}}, author = {Wenchao Zhou and Micah Sherr and Tao Tao and Xiaozhou Li and Boon Thau Loo and Yun Mao}, year = {2010}, booktitle = {ACM SIGMOD International Conference on Management of Data (SIGMOD)}, month = {June}, documenturl = {papers/efficient-prov.pdf}, abstract = {Network accountability, forensic analysis, and failure diagnosis are becoming increasingly important for network management and security. Such capabilities often utilize {\em network provenance} -- the ability to issue queries over network meta-data. For example, network provenance may be used to trace the path a message traverses on the network as well as to determine how message data was derived and which parties were involved in its derivation. This paper presents the design and implementation of ExSPAN, a {\em generic and extensible} framework to achieve efficient network provenance in a distributed environment. We utilize the database notion of {\em data provenance} to explain'' the existence of {\em any} network state, providing a versatile mechanism for network provenance. To achieve such flexibility at Internet-scale, ExSPAN uses declarative networking in which network protocols can be modeled as continuous queries over distributed streams and specified concisely in a declarative query language. We extend existing data models for provenance developed in database literature to enable distribution at Internet-scale, and investigate numerous optimization techniques to maintain and query distributed network provenance efficiently. The ExSPAN prototype is developed using RapidNet, a declarative networking platform based on the emerging ns-3 toolkit. Our experiments over a simulated network and an actual deployment in a testbed environment demonstrate that our system can support a wide range of distributed provenance computations efficiently, resulting in significant reductions in bandwidth costs compared to traditional approaches.} }  @inproceedings{secureblox, title = {{SecureBlox: Customizable Secure Distributed Data Processing}}, author = {William Marczak and Shan Shan Huang and Martin Bravenboer and Micah Sherr and Boon Thau Loo and Molham Aref}, year = {2010}, booktitle = {ACM SIGMOD International Conference on Management of Data (SIGMOD)}, month = {June}, documenturl = {papers/secureblox.pdf}, abstract = {We present {\em SecureBlox}, a declarative system that unifies a distributed query processor with a security policy framework. In SecureBlox, programmers compose existing mechanisms to compactly specify and reconfigure security policies. Our implementation of SecureBlox is a series of extensions to {\em LogicBlox}, an emerging commercial Datalog-based platform for enterprise software systems, with enhancements to enable distribution, \emph{integrity constraints} and static meta-programmability. SecureBlox allows meta-programmability via {\em BloxGenerics}---a language extension for compile-time code generation based on the security requirements and trust policies of the deployed environment. We present and evaluated detailed use-cases where SecureBlox enables applications such as an authenticated declarative routing protocol with encrypted advertisements and an authenticated and encrypted parallel hash join operation. Our results demonstrate SecureBlox's ability to specify and implement a wide range of different security constructs for distributed systems, and enable tradeoffs between performance and security.} }  @inproceedings{a3-ndss, title = {{A3: An Extensible Platform for Application-Aware Anonymity}}, author = {Micah Sherr and Andrew Mao and William R. Marczak and Wenchao Zhou and Boon Thau Loo and Matt Blaze}, year = {2010}, booktitle = {17th Annual Network and Distributed System Security Symposium (NDSS)}, month = {February}, abstract = {This paper presents the design and implementation of {\em Application-Aware Anonymity} (A3), an extensible platform for applications to deploy anonymity-based services on the Internet. A3 allows applications to tailor their anonymity properties and performance characteristics according to their specific communication requirements. For example, A3 permits an anonymous voice-over-IP application to produce anonymous paths with low latency and jitter, while providing anonymous file transfer applications with high bandwidth (but not necessarily low latency or jitter) routes. To support flexible path construction, A3 exposes a declarative language (A3Log) that enables applications to compactly specify path selection and instantiation policies which are then executed using a declarative networking engine. We demonstrate that our declarative language is sufficiently versatile to represent novel multi-metric performance constraints as well as existing relay selection algorithms used by Tor and other anonymity systems, using only a few lines of concise code. In addition to specifying relay selection strategies, senders are able to use our declarative techniques to construct anonymous tunnels according to their specifications (for example, via Onion Routing or Crowds). We experimentally evaluate the A3 system using a combination of trace-driven simulations and deployment on PlanetLab. Our experimental results demonstrate that the A3 system can flexibly support a wide range of path selection and instantiation strategies at low performance overhead.}, documenturl = {papers/a3-ndss.pdf} }  @inproceedings{app-containers, title = {{Application Containers without Virtual Machines}}, month = {November}, year = {2009}, author = {Micah Sherr and Matt Blaze}, booktitle = {{2nd ACM Workshop on Virtual Machine Security (VMSec)}}, abstract = {This position paper introduces lightweight {\em cryptographic jails} (CryptoJails) that protect the privacy of application data by intercepting write accesses and redirecting them to encrypted application containers. CryptoJails ensure that application data (for example, cached emails or web pages) cannot be read or undetectably altered by other applications. Unlike existing approaches, CryptoJails do not require kernel modifications or even superuser (i.e., root) privileges, do not impose significant performance overhead, and may even be used with already installed applications.}, documenturl = {papers/app-containers.pdf} }  @inproceedings{calea, title = {{Can They Hear me Now? A Security Analysis of Law Enforcement Wiretaps}}, author = {Micah Sherr and Gaurav Shah and Eric Cronin and Sandy Clark and Matt Blaze}, booktitle = {{ACM Conference on Computer and Communications Security (CCS)}}, month = {November}, year = {2009}, abstract = {Although modern communications services are susceptible to third-party eavesdropping via a wide range of possible techniques, law enforcement agencies in the US and other countries generally use one of two technologies when they conduct legally-authorized interception of telephones and other communications traffic. The most common of these, designed to comply with the 1994 {\em Communications Assistance for Law Enforcement Act (CALEA)}, use a standard interface provided in network switches. This paper analyzes the security properties of these interfaces. We demonstrate that the standard CALEA interfaces are vulnerable to a range of unilateral attacks by the intercept target. In particular, because of poor design choices in the interception architecture and protocols, our experiments show it is practical for a CALEA-tapped target to overwhelm the link to law enforcement with spurious signaling messages without degrading her own traffic, effectively preventing call records as well as content from being monitored or recorded. We also identify stop-gap mitigation strategies that partially mitigate some of our identified attacks.}, documenturl = {papers/calea.pdf} }  @inproceedings{link-routing, title = {{Scalable Link-Based Relay Selection for Anonymous Routing}}, author = {Micah Sherr and Matt Blaze and Boon Thau Loo}, booktitle = {{Privacy Enhancing Technologies Symposium (PETS)}}, year = 2009, month = {August}, documenturl = {papers/pets09.pdf}, abstract = {The performance of an anonymous path can be described using many network metrics -- e.g., bandwidth, latency, jitter, loss, etc. However, existing relay selection algorithms have focused exclusively on producing paths with high bandwidth. In contrast to traditional {\em node-based} path techniques in which relay selection is biased by relays' node-characteristics (i.e., bandwidth), this paper presents the case for {\em link-based} path generation in which relay selection is weighted in favor of the highest performing links. Link-based relay selection supports more flexible routing, enabling anonymous paths with low latency, jitter, and loss, in addition to high bandwidth. Link-based approaches are also more secure than node-based techniques, eliminating hotspots'' in the network that attract a disproportionate amount of traffic. For example, misbehaving relays cannot advertise themselves as low-latency'' nodes to attract traffic, since latency has meaning only when measured between two endpoints. We argue that link-based path selection is practical for certain anonymity networks, and describe mechanisms for efficiently storing and disseminating link information.} }  @inproceedings{veracity-usenix, title = {{Veracity: Practical Secure Network Coordinates via Vote-based Agreements}}, author = {Micah Sherr and Matt Blaze and Boon Thau Loo}, booktitle = {{USENIX Annual Technical Conference (USENIX ATC)}}, documenturl = {papers/veracity-usenix-atc09.pdf}, year = {2009}, month = {June}, abstract = {Decentralized network coordinate systems have been proposed as a means of efficiently estimating network distances among end-hosts over the Internet without having to contact them directly. These systems support a wide range of network services, including proximity-based routing, neighbor selection in overlays, network-aware overlays, and replica placement in content-distribution networks. In this paper, we describe {\em Veracity}, a practical fully-decentralized service for securing network coordinate systems. In Veracity, all advertised coordinates and subsequent coordinate updates must be independently verified by a small set of nodes via a voting scheme. Unlike existing approaches, Veracity does not require any {\em a priori} secrets or trusted parties, and does not depend on outlier analysis of coordinates based on a fixed set of neighbors. We have implemented Veracity by modifying an open-source network coordinate system, and have demonstrated within a simulated network environment and deployment on PlanetLab that Veracity mitigates attacks for moderate sizes of malicious nodes (up to 30\% of the network), even when coalitions of attackers coordinate their attacks. We further show that Veracity is resistant to high levels of churn and incurs only a modest communication overhead.} }  @inproceedings{ess-evt08, title = {{Security Evaluation of the ES\&S Voting Machines and Election Management System}}, author = {Aviv, Adam and Cern\'y, Pavol and Clark, Sandy and Cronin, Eric and Shah, Gaurav and Sherr, Micah and Blaze, Matt}, booktitle = {Third USENIX/ACCURATE Electronic Voting Technology Workshop (EVT)}, year = {2008}, month = {August}, documenturl = {papers/aviv-evt08.pdf}, abstract = {In response to growing concerns about the security and reliability of electronic voting systems, Ohio Secretary of State Jennifer Brunner initiated the Evaluation \& Validation of Election-Related Equipment, Standards and Testing (EVEREST)'' study in October 2007. EVEREST was the first major study of ES\&S voting systems and only the second comprehensive study that examined all components -- from backend registration systems to frontend ballot casting -- of an electronic voting system. In this paper, we describe our experiences as security auditors of the ES\&S voting system for the Ohio EVEREST study. We identify numerous critical vulnerabilities in nearly every component of the ES\&S system that enable attacks that could alter or forge precinct results, install corrupt firmware, and erase audit records. In particular, we highlight the architectural issues of the ES\&S voting system and show how the {\em interaction} of the various software and hardware modules leads to systemic vulnerabilities that do not appear to be easily countered with election procedures or software updates.} }  @inproceedings{confusion:wsp05, author = {Eric Cronin and Micah Sherr and Matt Blaze}, title = {{Listen Too Closely and You May Be Confused}}, booktitle = {Security Protocols Workshop}, month = {April}, year = {2005}, abstract = {Among the most basic simplifying assumptions of modern communications security is the notion that most communication channels should, by their very nature, be considered vulnerable to interception. It has long been considered almost reckless to suggest depending on any supposed intrinsic security properties of the network itself, and especially foolish in complex, decentralized, heterogeneously-controlled networks such as the modern Internet. Orthodox doctrine is that any security must be either end-to-end (as with cryptography), or not considered to exist at all. While this heuristic well serves cautious confidential communicators, it is unsatisfying from the point of view of the eavesdropper. Paradoxically, while end-to-end security may be a prerequisite to robust confidentiality in most networks, it does not follow that a lack of end-to-end security always makes it possible to eavesdrop.}, documenturl = {papers/confusion-listen.pdf} }  @inproceedings{isotropism-measurable, title = {{Measurable Security Through Isotropic Channels}}, author = {Sherr, Micah and Cronin, Eric and Blaze, Matt}, booktitle = {Fifteenth International Workshop on Security Protocols}, year = 2007, month = {May}, abstract = {This position paper proposes the use of special broadcast networks to achieve provable and measurable confidentiality of messages. We call these networks {\em isotropic channels}, broadcast channels in which receivers cannot reliably determine whether a given message originated from any particular sender and senders cannot prevent a message from reaching any particular receiver. As long as eavesdroppers cannot reliably (i.e., with probabilistic certainty) identify the sender of a message, honest parties can efficiently exchange messages with confidentiality that asymptotically approaches and in some cases reaches perfect secrecy. Even under incorrect assumptions regarding the degree of isotropism offered by a particular channel, a high measure of confidentiality can be efficiently achieved. This position paper makes the case that isotropic channels already exist, and are, in fact, often used in practice. By leveraging isotropic techniques, measurable information theoretic security can be practically achieved.}, documenturl = {papers/isotropism-spw.pdf} }  @article{seer, title = {{Metadata Tables to Enable Dynamic Data Modeling and Web Interface Design}}, author = {Weiner, Mark and Sherr, Micah and Cohen, Abigail}, pages = {51-58}, year = 2002, journal = {International Journal of Medical Informatics}, month = {April}, number = {1}, volume = 65, abstract = {A wealth of information addressing health status, outcomes and resource utilization is compiled and made available by various government agencies. While exploration of the data is possible using existing tools, in general, would-be users of the resources must acquire CD-ROMs or download data from the web, and upload the data into their own database. Where web interfaces exist, they are highly structured, limiting the kinds of queries that can be executed. This work develops a web-based database interface engine whose content and structure is generated through interaction with a metadata table. The result is a dynamically generated web interface that can easily accommodate changes in the underlying data model by altering the metadata table, rather than requiring changes to the interface code. This paper discusses the background and implementation of the metadata table and web-based front end and provides examples of its use with the Surveillance, Epidemiology and End-Results (SEER) database.}, documenturl = {http://www.ncbi.nlm.nih.gov/pubmed/11904248} }  @inproceedings{sherr:npsec05, title = {{Mitigating DoS Attack Through Selective Bin Verification}}, author = {Sherr, Micah and Greenwald, Michael and Gunter, Carl A. and Khanna, Sanjeev and Venkatesh, Santosh S.}, booktitle = {Workshop on Secure Network Protocols (NPSec)}, year = 2005, month = {November}, filename = {npsec.pdf}, abstract = {Despite considerable attention from both the academic and commercial communities, denial-of-service (DoS) attacks represent a growing threat to network administrators and service providers. A large number of proposed DoS countermeasures attempt to detect an attack in-progress and filter out the DoS attack packets. These techniques often depend on the instantiation of sophisticated routing mechanisms and the ability to differentiate between normal and malicious messages. Unfortunately, neither of these prerequisites may be practical or possible. We propose and evaluate a defense against DoS attacks which we call selective bin verification. The technique shows promise against large DoS attacks, even when attack packets are able to permeate the network and reach the target of their attack. We explore the effectiveness of our technique by implementing an experimental testbed in which selective bin verification is successfully used to protect against DoS attacks. We formally describe the mathematical properties of our approach and parameters for defending against various attacks.}, documenturl = {papers/binning.pdf} }  @inproceedings{confusion:ifip, title = {{On the Reliability of Current Generation Network Eavesdropping Tools}}, author = {Cronin, Eric and Sherr, Micah and Blaze, Matt}, booktitle = {Second Annual IFIP WG 11.9 International Conference on Digital Forensics}, year = 2006, month = {January}, location = {Orlando, Florida}, documenturl = {papers/eavesdropping-ifip.pdf}, abstract = {This paper analyzes the problem of interception of Internet traffic from the eavesdropper's point of view. We focus on highly favorable conditions for the eavesdropper in which the communicating parties do not cooperate to obscure their traffic (e.g., messages are sent using the standard protocols without the use of cryptography or steganography). We show that this seemingly simple eavesdropping problem is harder than previously thought, and that simple -- and entirely unilateral -- countermeasures are sufficient to prevent accurate traffic capture in many Internet interception configurations, including those employed by every available eavesdropping system we tested. Central to our approach is a new class of techniques that we call {\em confusion}, which, unlike cryptography or steganography, does not require cooperation by the communicating parties and, in some case, can be employed entirely by a third party not involved in the communication at all. We show the viability of these threats with a practical and effective eavesdropping-countermeasures toolkit.} }  @article{confusion:ijsn, title = {{On the (un)Reliability of Eavesdropping}}, author = {Cronin, Eric and Sherr, Micah and Blaze, Matt}, year = 2008, month = {February}, journal = {International Journal of Security and Networks (IJSN)}, volume = {3}, number = {2}, documenturl = {papers/eavesdropping-ijsn.pdf}, abstract = {We investigate the reliability of current generation eavesdropping tools and show that obtaining 'high fidelity' transcripts is harder than previously assumed. Even in situations highly favourable to the eavesdropper, simple unilateral countermeasures are shown to be sufficient to prevent all tested systems from reliably reconstructing communicated messages. Less than a third of the tested systems report irregularities, and 45\% incorrectly interpret covertext chosen by the sending party. Unlike cryptography or steganography, the techniques introduced require no cooperation by the communicating parties and, in some case, can be employed entirely by a third party not involved in the communication at all.} }  @inproceedings{sensor-interesting, title = {{Sensor Network Security: More Interesting Than You Think}}, author = {Anand, Madhukar and Cronin, Eric and Sherr, Micah and Blaze, Matt and Ives, Zachary and Lee, Insup}, booktitle = {USENIX Workshop on Hot Topics in Security (HotSec)}, year = 2006, month = {August}, abstract = {With the advent of low-power wireless sensor networks, a wealth of new applications at the interface of the real and digital worlds is emerging. A distributed computing platform that can measure properties of the real world, formulate intelligent inferences, and instrument responses, requires strong foundations in distributed computing, artificial intelligence, databases, control theory, and security. Before these intelligent systems can be deployed in critical infrastructures such as emergency rooms and powerplants, the security properties of sensors must be fully understood. Existing wisdom has been to apply the traditional security models and techniques to sensor networks. However, sensor networks are not traditional computing devices, and as a result, existing security models and methods are ill suited. In this position paper, we take the first steps towards producing a comprehensive security model that is tailored for sensor networks. Incorporating work from Internet security, ubiquitous computing, and distributed systems, we outline security properties that must be considered when designing a secure sensor network. We propose challenges for networks sensorsecurity obstacles that, when overcome, will move us closer to decreasing the divide between computers and the physical world.}, documenturl = {papers/sensor-interesting.pdf} }  @article{wiretapping, title = {{Signaling Vulnerabilities in Wiretapping Systems}}, author = {Sherr, Micah and Cronin, Eric and Clark, Sandy and Blaze, Matt}, booktitle = {IEEE Security & Privacy Magazine}, pages = {13-25}, year = 2005, journal = {IEEE Security & Privacy Magazine}, month = {November}, number = {6}, howpublished = {IEEE Security & Privacy Magazine}, volume = 3, abstract = {Telephone wiretap and dialed number recording systems are used by law enforcement and national security agencies to collect investigative intelligence and legal evidence. In this paper, we show that many of these systems are vulnerable to simple, unilateral countermeasures that allow wiretap targets to prevent their call audio from being recorded and/or cause false or inaccurate dialed digits and call activity to be logged. The countermeasures exploit the unprotected in-band signals passed between the telephone network and the collection system and are effective against many of the wiretapping technologies currently used by US law enforcement, including at least systems. Possible remedies and workarounds are proposed, and the broader implications of the security properties of these systems are discussed.}, documenturl = {papers/wiretap.pdf} }  @inproceedings{a3, title = {{Towards Application-Aware Anonymous Routing}}, author = {Sherr, Micah and Loo, Boon Thau and Blaze, Matt}, booktitle = {Second USENIX Workshop on Hot Topics in Security (HotSec)}, year = 2007, month = {August}, abstract = {This paper investigates the problem of designing anonymity networks that meet application-specific performance and security constraints. We argue that existing anonymity networks take a narrow view of performance by considering only the strength of the offered anonymity. However, real-world applications impose a myriad of communication requirements, including end-to-end bandwidth and latency, trustworthiness of intermediary routers, and network jitter. We pose a grand challenge for anonymity: the development of a network architecture that enables applications to customize routes that tradeoff between anonymity and performance. Towards this challenge, we present the {\em Application-Aware Anonymity (A$^3$)} routing service. We envision that A$^3\$ will serve as a powerful and flexible anonymous
communications layer that will spur the future development of anonymity
services.
},
documenturl = {papers/a3-hotsec.pdf}
}

@inproceedings{veracity,
title = {{Veracity: A Fully Decentralized Service for Securing Network Coordinate Systems}},
author = {Sherr, Micah and Loo, Boon Thau and Blaze, Matt},
booktitle = {7th International Workshop on Peer-to-Peer Systems (IPTPS)},
year = 2008,
month = {February},
abstract = {Decentralized logical coordinate systems have been proposed as a means
of estimating network distances. These systems have widespread usage in
p2p networks, ranging from neighbor selection to replica
placement. Unfortunately, these systems are vulnerable to even a small
number of malicious nodes lying about their coordinates or
measurements. In this paper, we introduce {\em Veracity}, a {\em fully}
decentralized service for securing network coordinate systems. Unlike
prior proposals, Veracity requires neither the presence of a large number of
{\em a priori} trusted nodes nor the use of network triangle inequality
testing. Veracity utilizes a vote-based approach, where all advertised
coordinates are independently verified by a minimal set of nodes
before being used. Via detailed simulations in p2psim, we
demonstrate that Veracity mitigates a variety of known attacks against
Vivaldi for moderate sizes of malicious nodes, incurring acceptable
communication overhead, and in some cases, even reducing the convergence
time of the coordinate system.},
documenturl = {papers/veracity.pdf}
}