sravi_pubs.bib

@inproceedings{smartreply2016,
  author = {Anjuli Kannan and Karol Kurach and Sujith Ravi and Tobias Kaufmann and Andrew Tomkins and Balint Miklos and Greg Corrado and Laszlo Lukacs and Marina Ganea and Peter Young and Vivek Ramavajjala},
  title = {Smart Reply: Automated Response Suggestion for Email},
  booktitle = {Proceedings of the ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)},
  url = {http://arxiv.org/pdf/1606.04870.pdf},
  year = {2016},
  abstract = {In this paper we propose and investigate a novel end-to-end method for automatically generating short email responses, called Smart Reply. It generates semantically diverse suggestions that can be used as complete email responses with just one tap on mobile. The system is currently used in Inbox by Gmail and is responsible for assisting with 10\% of all mobile responses. It is designed to work at very high throughput and process hundreds of millions of messages daily. The system exploits state-of-the-art, large-scale deep learning.
  
  We describe the architecture of the system as well as the challenges that we faced while building it, like response diversity and scalability. We also introduce a new method for semantic clustering of user-generated content that requires only a modest amount of explicitly labeled data.}
}
@inproceedings{trailer2016,
  author = {Harrie Oosterhuis and Sujith Ravi and Michael Bendersky},
  title = {Semantic Video Trailers},
  booktitle = {ICML 2016 Workshop on Multi-View Representation Learning (MVRL)},
  url = {http://arxiv.org/pdf/1609.01819.pdf},
  year = {2016},
  abstract = {Query-based video summarization is the task of creating a brief visual trailer, which captures the parts of the video (or a collection of videos) that are most relevant to the user-issued query. In this paper, we propose an unsupervised label propagation approach for this task. Our approach effectively captures the multimodal semantics of queries and videos using state-of-the-art deep neural networks and creates a summary that is both semantically coherent and visually attractive. We describe the theoretical framework of our graph-based approach and empirically evaluate its effectiveness in creating relevant and attractive trailers. Finally, we showcase example video trailers generated by our system.}
}
@inproceedings{debates2016,
  author = {Justine Zhang and Ravi Kumar and Sujith Ravi and Cristian Danescu-Niculescu-Mizil},
  title = {Conversational flow in Oxford-style debates},
  booktitle = {Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics - Human Language Technologies (NAACL/HLT)},
  url = {http://tisjune.github.io/papers/paper-iq2.pdf},
  year = {2016},
  abstract = {Public debates are a common platform for presenting and juxtaposing diverging views on important issues. In this work we propose a methodology for tracking how ideas flow between participants throughout a debate. We use this approach in a case study of Oxford-style debates -- a competitive format where the winner is determined by audience votes -- and show how the outcome of a debate depends on aspects of conversational flow. In particular, we find that winners tend to make better use of a debate’s interactive component than losers, by actively pursuing their opponents' points rather than promoting their own ideas over the course of the conversation}
}
@inproceedings{expander2016,
  author = {Sujith Ravi and Qiming Diao},
  title = {Large Scale Distributed Semi-Supervised Learning Using Streaming Approximation},
  booktitle = {Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS)},
  url = {http://jmlr.org/proceedings/papers/v51/ravi16.pdf},
  year = {2016},
  abstract = {Traditional graph-based semi-supervised learning (SSL) approaches are not suited for massive data and large label scenarios since they scale linearly with the number of edges |E| and distinct labels m. To deal with the large label size problem, recent works propose sketch-based methods to approximate the label distribution per node thereby achieving a space reduction from O(m) to O(log m), under certain conditions. In this paper, we present a novel streaming graphbased SSL approximation that effectively captures the sparsity of the label distribution and further reduces the space complexity per node to O(1). We also provide a distributed version of the algorithm that scales well to large data sizes. Experiments on real-world datasets demonstrate that the new method achieves better performance than existing state-of-the-art algorithms with significant reduction in memory footprint. Finally, we propose a robust graph augmentation strategy using unsupervised deep learning architectures that yields further significant quality gains for SSL in natural language applications.}
}
@inproceedings{hierlp2016,
  author = {James B. Wendt and Michael Bendersky and Lluis Garcia-Pueyo and Vanja Josifovski and Balint Miklos and Ivo Krka and Amitabh Saikia and Jie Yang and Marc-Allen Cartright and Sujith Ravi},
  title = {Hierarchical Label Propagation and Discovery for Machine Generated Email},
  booktitle = {Proceedings of the International Conference on Web Search and Data Mining (WSDM)},
  url = {http://research.google.com/pubs/archive/44293.pdf},
  year = {2016},
  abstract = {Machine-generated documents such as email or dynamic webpages are single instantiations of a pre-defined structural template. As such, they can be viewed as a hierarchy of template and document specific content. This hierarchical template representation has several important advantages for document clustering and classification. First, templates capture common topics among the documents, while filtering out the potentially noisy variabilities such as personal information. Second, template representations scale far better than document representations since a single template captures numerous documents. Finally, since templates group together structurally similar documents, they can propagate properties between all the documents that match the template. In this paper, we use these advantages for document classification by formulating an efficient and effective hierarchical label propagation and discovery algorithm. The labels are propagated first over a template graph (constructed based on either term-based or topic-based similarities), and then to the matching documents. We evaluate the performance of the proposed algorithm using a large donated email corpus and show that the resulting template graph is significantly more compact than the corresponding document graph and the hierarchical label propagation is both efficient and effective in increasing the coverage of the baseline document classification algorithm. We demonstrate that the template label propagation achieves more than 91\% precision and 93\% recall, while increasing the label coverage by more than 11\%.}
}
@inproceedings{optproto2015,
  author = {Song Feng and Sujith Ravi and Ravi Kumar and Polina Kuznetsova and Wei Liu and Alex Berg and Tamara Berg and Yejin Choi},
  title = {Refer-to-as Relations as Semantic Knowledge.},
  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
  url = {pubs/optproto-aaai2015.pdf},
  year = {2015},
  abstract = {We study Refer-to-as relations as a new type of semantic knowledge. Compared to the much studied Is-a relation, which concerns factual taxonomic knowledge, Refer-to-as relations aim to address pragmatic semantic knowledge. For example, a "penguin" is a "bird" from a taxonomic point of view, but people rarely refer to a "penguin" as a "bird" in vernacular use. This observation closely relates to the entry-level categorization studied in Psychology. We posit that Refer-to-as relations can be learned from data, and that both textual and visual information would be helpful in inferring the relations. By integrating existing lexical structure knowledge with language statistics and visual similarities, we formulate a collective inference approach to map all object names in an encyclopedia to commonly used names for each object. Our contributions include a new labeled data set, the collective inference and optimization approach, and the computed mappings and similarities.}
}
@inproceedings{fastlda2014,
  author = {Aaron Li and Amr Ahmed and Sujith Ravi and Alex Smola},
  title = {Reducing the Sampling Complexity of Topic Models.},
  booktitle = {Proceedings of the ACM Conference on Knowledge Discovery and Data Mining (KDD)},
  url = {pubs/fastlda-kdd2014.pdf},
  year = {2014},
  abstract = {Inference in topic models typically involves a sampling step to associate latent variables with observations. Unfortunately the generative model loses sparsity as the amount of data increases, requiring O(k) operations per word for k topics. In this paper we propose an algorithm which scaleslinearly with the number of actually instantiated topics kd in the document. For large document collections and in structured hierarchical models k_d << k. This yields an order of magnitude speedup. Our method applies to a wide variety of statistical models such as PDP [16, 4] and HDP [19].

At its core is the idea that dense, slowly changing distributions can be approximated efficiently by the combination of a Metropolis-Hastings step, use of sparsity, and amortized constant time sampling via Walker's alias method.}
}
@inproceedings{soq2014,
  author = {Sujith Ravi and Bo Pang and Vibhor Rastogi and Ravi Kumar},
  title = {Great {Q}uestion! {Q}uestion Quality in {C}ommunity {Q&A}.},
  booktitle = {Proceedings of the International AAAI Conference on Weblogs and Social Media (ICWSM)},
  url = {pubs/soq-icwsm2014.pdf},
  year = {2014},
  abstract = {Asking the right question in the right way is an art (and a science). In a community question-answering setting, a good question is not just one that is found to be useful by other people—a question is good if it is also presented clearly and shows prior research. Using a community question-answering site that allows voting over the questions, we show that there is a notion of question quality that goes beyond mere popularity. We present techniques using latent topical models to automatically predict the quality of questions based on their content. Our best system achieves a prediction accuracy of 72\%, beating out strong baselines by a significant amount. We also examine the effect of question quality on the dynamics of user behavior and the longevity of questions.}
}
@inproceedings{mlctag2014,
  author = {Sujith Ravi and Sergei Vassilivitskii and Vibhor Rastogi},
  title = {Parallel Algorithms for Unsupervised Tagging.},
  booktitle = {Proceedings of the Transactions of the Association for Computational Linguistics (TACL)},
  url = {http://www.transacl.org/wp-content/uploads/2014/04/44.pdf},
  year = {2014},
  abstract = {We propose a new method for unsupervised tagging that finds minimal models which are then further improved by Expectation Maximization training. In contrast to previous approaches that rely on manually specified and multi-step heuristics for model minimization, our approach is a simple greedy approximation algorithm DMLC (DISTRIBUTED-MINIMUM-LABEL-COVER) that solves this objective in a single step.

We extend the method and show how to efficiently parallelize the algorithm on modern parallel computing platforms while preserving approximation guarantees. The new method easily scales to large data and grammar sizes, overcoming the memory bottleneck in previous approaches. We demonstrate the power of the new algorithm by evaluating on various sequence labeling tasks: Part-of-Speech tagging for multiple languages (including lowresource languages), with complete and incomplete dictionaries, and supertagging, a complex sequence labeling task, where the grammar size alone can grow to millions of entries. Our results show that for all of these settings, our method achieves state-of-the-art scalable performance that yields high quality tagging outputs.}
}
@inproceedings{summdispersion2013,
  author = {Anirban Dasgupta and Ravi Kumar and Sujith Ravi},
  title = {Summarization through Submodularity and Dispersion.},
  booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (ACL)},
  url = {http://aclweb.org/anthology/P/P13/P13-1100.pdf},
  year = {2013},
  abstract = {We propose a new optimization framework for summarization by generalizing the submodular framework of (Lin and Bilmes, 2011). In our framework the summarization desideratum is expressed as a sum of a submodular function and a nonsubmodular function, which we call dispersion; the latter uses inter-sentence dissimilarities in different ways in order to ensure non-redundancy of the summary. We consider three natural dispersion functions and show that a greedy algorithm can obtain an approximately optimal summary in all three cases. We conduct experiments on two corpora—DUC 2004 and user comments on news articles -- and show that the performance of our algorithm outperforms those that rely only on submodularity.}
}
@inproceedings{mthashsampling2013,
  author = {Sujith Ravi},
  title = {Scalable Decipherment for Machine Translation via Hash Sampling.},
  booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (ACL)},
  url = {http://www.aclweb.org/anthology/P/P13/P13-1036.pdf},
  year = {2013},
  abstract = {In this paper, we propose a new Bayesian inference method to train statistical machine translation systems using only non-parallel corpora. Following a probabilistic decipherment approach, we first introduce a new framework for decipherment training that is flexible enough to incorporate any number/type of features (besides simple bag-of-words) as side-information used for estimating translation models. In order to perform fast, efficient Bayesian inference in this framework, we then derive a hash sampling strategy that is inspired by the work of Ahmed et al. (2012). The new translation hash sampler enables us to scale elegantly to complex models (for the first time) and large vocabulary/corpora sizes. We show empirical results on the OPUS data -- our method yields the best BLEU scores compared to existing approaches, while achieving significant computational speedups (several orders faster). We also report for the first time -- BLEU score results for a largescale MT task using only non-parallel data (EMEA corpus).}
}
@inproceedings{eyemouse2013,
  author = {Vidhya Navalpakkam and Ladawn Jentzsch and Rory Sayres and Sujith Ravi and Amr Ahmed and Alex Smola},
  title = {Measurement and modeling of eye-mouse behavior.},
  booktitle = {Proceedings of the 22nd International World Wide Web Conference (WWW)},
  url = {http://www.sravi.org/pubs/eyemouse-www2013.pdf},
  year = {2013},
  abstract = {As search pages are becoming increasingly complex, with images and nonlinear page layouts, understanding how users examine the page is important. We present a lab study on the effect of a rich informational panel to the right of the search result column, on eye and mouse behavior. Using eye and mouse data, we show that the flow of user attention on nonlinear page layouts is different from the widely believed top-down linear examination order of search results. We further demonstrate that the mouse, like the eye, is sensitive to two key attributes of page elements – their position (layout), and their relevance to the user’s task. We identify mouse measures that are strongly correlated with eye movements, and develop models to predict user attention (eye gaze) from mouse activity. These findings show that mouse tracking can be used to infer user attention and information flow patterns on search pages. Potential applications include ranking, search page optimization, and UI evaluation.}
}
@inproceedings{fastex2012,
  author = {Amr Ahmed and Sujith Ravi and Shravan Narayanamurthy and Alex Smola},
  title = {FastEx: Hash Clustering with Exponential Families.},
  booktitle = {Proceedings of the 26th Conference on Neural Information Processing Systems (NIPS)},
  url = {pubs/hashclustering-nips2012.pdf},
  year = {2012},
  abstract = {Clustering is a key component in any data analysis toolbox. Despite its importance, scalable algorithms often eschew rich statistical models in favor of simpler descriptions such as k-means clustering. In this paper we present a sampler, capable of estimating mixtures of exponential families. At its heart lies a novel proposal distribution using random projections to achieve high throughput in generating proposals, which is crucial for clustering models with large numbers of clusters.}
}
@inproceedings{response2012,
  author = {Bo Pang and Sujith Ravi},
  title = {Revisiting the Predictability of Language: Response Completion in Social Media},
  booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing and Natural Language Learning (EMNLP-CoNLL)},
  url = {http://aclweb.org/anthology-new/D/D12/D12-1136.pdf},
  year = {2012},
  abstract = {The question ``how predictable is English?'' has long fascinated researchers. While prior
work has focused on formal English typically used in news articles, we turn to texts generated by users in online settings that are more informal in nature. We are motivated by a novel application scenario: given the difficulty of typing on mobile devices, can we help reduce typing effort with message completion, especially in conversational settings? We propose a method for automatic response completion. Our approach models both the language used in responses and the specific context provided by the original message. Our experimental results on a large-scale dataset show that both components help reduce typing effort. We also perform an information-theoretic study in this setting and examine the entropy of user-generated content, especially in conversational scenarios, to better understand predictability of user generated English.}
}
@inproceedings{ravi_knight_mtdec_acl11,
  author = {Sujith Ravi and
                Kevin Knight},
  title = {Deciphering Foreign Language},
  booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies (ACL-HLT)},
  year = {2011},
  url = {http://aclweb.org/anthology/P/P11/P11-1002.pdf},
  abstract = {In this work, we tackle the task of machine translation (MT) without parallel training data. We frame the MT problem as a decipherment task, treating the foreign text as a cipher for English and present novel methods for training translation models from non-parallel text.},
  slides = {http://www.sravi.org/pubs/mt-decipherment_acl11-slides.pdf}
}
@inproceedings{ravi_knight_bayesdec_acl11,
  author = {Sujith Ravi and
                Kevin Knight},
  title = {Bayesian Inference for {Z}odiac and Other Homophonic Ciphers},
  booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies (ACL-HLT)},
  year = {2011},
  url = {http://aclweb.org/anthology/P/P11/P11-1025.pdf},
  abstract = {We introduce a novel Bayesian approach for deciphering complex substitution ciphers. Our method uses a decipherment model which combines information from letter n-gram language models as well as word dictionaries. Bayesian inference is performed on our model using an efficient sampling technique. We evaluate the quality of the Bayesian decipherment output on simple and homophonic letter substitution ciphers and show that unlike a previous approach, our method consistently produces almost 100\% accurate decipherments. The new method can be applied on more complex substitution ciphers and we demonstrate its utility by cracking the famous Zodiac-408 cipher in a fully automated fashion, which has never been done before.}
}
@inproceedings{srlccg_ijcnlp11,
  author = {Stephen Boxwell and Chris Brew and Jason Baldridge and Dennis Mehay and Sujith Ravi},
  title = {Semantic {R}ole {L}abeling for {CCG} Without Treebanks},
  booktitle = {Proceedings of the International Joint Conference on Natural Language Processing (IJCNLP).},
  year = {2011},
  url = {http://aclweb.org/anthology-new/I/I11/I11-1022.pdf},
  abstract = {We describe a method for training a semantic role labeler for CCG in the absence of gold-standard syntax derivations. Traditionally, semantic role labeling is performed by placing human-annotated semantic roles on gold-standard syntactic parses, identifying patterns in the syntax-semantics relationship, and then predicting roles on novel syntactic analyses. The gold standard syntactic training data can be eliminated from the process by extracting training instances from semantic roles projected onto a packed parse chart. This process can be used to rapidly develop NLP tools for resource-poor languages of interest.}
}
@inproceedings{kozareva-ravi_emnlp11_unsup-ws,
  author = {Kozareva, Zornitsa and Ravi, Sujith},
  title = {Unsupervised Name Ambiguity Resolution Using A Generative Model},
  booktitle = {Proceedings of the EMNLP Workshop on Unsupervised Learning in NLP},
  year = {2011},
  url = {http://www.aclweb.org/anthology-new/W/W11/W11-2213.pdf},
  abstract = {Resolving ambiguity associated with names found on the Web, Wikipedia or medical texts is a very challenging task, which has been of great interest to the research community. We propose a novel approach to disambiguating names using Latent Dirichlet Allocation, where the learned topics represent the underlying senses of the ambiguous name. We conduct a detailed evaluation on multiple data sets containing ambiguous person, location and organization names and for multiple languages such as English, Spanish, Romanian and Bulgarian. We conduct comparative studies with existing approaches and show a substantial improvement of 15 to 35\% in task accuracy.}
}
@inproceedings{ravi_et_al-coling10,
  author = {Sujith Ravi and
                Ashish Vaswani and
                Kevin Knight and 
                David Chiang},
  title = {Fast, Greedy Model Minimization for Unsupervised Tagging},
  booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (COLING)},
  year = {2010},
  pages = {940--948},
  url = {http://aclweb.org/anthology/C/C10/C10-1106.pdf},
  abstract = {Model minimization has been shown to work well for the task of unsupervised part-of-speech tagging with a dictionary. In Ravi and Knight (2009), the authors invoke an integer programming (IP) solver to do model minimization. However, solving this problem exactly using an integer programming formulation is intractable for practical purposes. We propose a novel two-stage greedy approximation scheme to replace the IP. Our method runs fast, while yielding highly accurate tagging results. We also compare our method against standard EM training, and show that we consistently obtain better tagging accuracies on test data of varying sizes for English and Italian.}
}
@inproceedings{ravi:baldridge:knight-acl10,
  author = {Sujith Ravi and
                Jason Baldridge and 
                Kevin Knight},
  title = {Minimized models and grammar-informed initialization for supertagging with highly ambiguous lexicons},
  booktitle = {Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics (ACL)},
  year = {2010},
  pages = {495--503},
  url = {http://www.aclweb.org/anthology/P10-1051.pdf},
  abstract = {We combine two complementary ideas
for learning supertaggers from highly ambiguous
lexicons: grammar-informed tag
transitions and models minimized via integer
programming. Each strategy on its
own greatly improves performance over
basic expectation-maximization training
with a bitag Hidden Markov Model, which
we show on the CCGbank and CCG-TUT
corpora. The strategies provide further error
reductions when combined. We describe
a new two-stage integer programming
strategy that efficiently deals with
the high degree of ambiguity on these
datasets while obtaining the full effect of
model minimization.}
}
@article{ravi:knight-compling09,
  author = {Sujith Ravi and
               Kevin Knight},
  title = {Does {GIZA}++ Make Search Errors?},
  journal = {Computational Linguistics},
  volume = {36},
  number = {3},
  pages = {295--302},
  year = {2010},
  url = {http://www.aclweb.org/anthology/J/J10/J10-3001.pdf},
  abstract = {Word alignment is a critical procedure within statistical machine translation (SMT). Brown et
al. (1993) have provided the most popular word alignment algorithm to date, one that has been
implemented in GIZA (Al-Onaizan et al. 1999) and GIZA++ (Och and Ney 2003) software and
adopted by nearly every SMT project. In this paper, we investigate whether this algorithm makes
search errors when it computes Viterbi alignments, i.e., whether it returns alignments that are
sub-optimal according to a trained model.}
}
@incollection{kim:shaw:ravi-edm-book10,
  author = {Jihie Kim and
                      Erin Shaw and 
                      Sujith Ravi},
  title = {Mining Student Discussions to Profile Participation and Scaffold Learning},
  booktitle = {The Handbook of Educational Data Mining},
  editor = {Cristobal Romero and Sebastian Ventura and Mykola Pechenizkiy and Ryan Baker},
  publisher = {CRC Press},
  year = {2010},
  pages = {299--310}
}
@inproceedings{chiang_et_al-naacl10,
  author = {David Chiang and
                Jonathan Graehl and 
                Kevin Knight and 
                Adam Pauls and
                Sujith Ravi},
  title = {Bayesian Inference for Finite-State Transducers},
  booktitle = {Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics - Human Language Technologies (NAACL/HLT)},
  year = {2010},
  pages = {447--455},
  url = {http://aclweb.org/anthology/N/N10/N10-1068.pdf},
  abstract = {We describe a Bayesian inference algorithm
that can be used to train any cascade of
weighted finite-state transducers on end-toend
data. We also investigate the problem
of automatically selecting from among multiple
training runs. Our experiments on four
dierent tasks demonstrate the genericity of
this framework, and, where applicable, large
improvements in performance over EM. We
also show, for unsupervised part-of-speech
tagging, that automatic run selection gives a
large improvement over previous Bayesian approaches.}
}
@inproceedings{ravi_et_al-wsdm10,
  author = {Sujith Ravi and
               Andrei Z. Broder and
               Evgeniy Gabrilovich and
               Vanja Josifovski and
               Sandeep Pandey and
               Bo Pang},
  title = {Automatic generation of bid phrases for online advertising},
  booktitle = {Proceedings of the International Conference on Web Search and Data Mining (WSDM)},
  year = {2010},
  pages = {341--350},
  url = {http://research.yahoo.com/files/wsdm246-ravi.pdf},
  abstract = {One of the most prevalent online advertising methods is textual
advertising. To produce a textual ad, an advertiser
must craft a short creative (the text of the ad) linking to a
landing page, which describes the product or service being
promoted. Furthermore, the advertiser must associate the
creative to a set of manually chosen bid phrases representing
those Web search queries that should trigger the ad. For
efficiency, given a landing page, the bid phrases are often
chosen first, and then for each bid phrase the creative is
produced using a template. Nevertheless, an ad campaign
(e.g., for a large retailer) might involve thousands of landing
pages and tens or hundreds of thousands of bid phrases,
hence the entire process is very laborious. \\ \\
Our study aims towards the automatic construction of online
ad campaigns: given a landing page, we propose several
algorithmic methods to generate bid phrases suitable for the
given input. Such phrases must be both relevant (that is, reflect
the content of the page) and well-formed (that is, likely
to be used as queries to a Web search engine). To this end,
we use a two phase approach. First, candidate bid phrases
are generated by a number of methods, including a (monolingual)
translation model capable of generating phrases not
contained within the text of the input as well as previously
``unseen'' phrases. Second, the candidates are ranked in a
probabilistic framework using both the translation model,
which favors relevant phrases, as well as a bid phrase language
model, which favors well-formed phrases. \\ \\
Empirical evaluation based on a real-life corpus of advertisercreated
landing pages and associated bid phrases confirms
the value of our approach, which successfully re-generates
many of the human-crafted bid phrases and performs significantly
better than a pure text extraction method.}
}
@inproceedings{ravi:knight-acl09,
  author = {Ravi, Sujith and Knight, Kevin},
  title = {Minimized models for unsupervised part-of-speech tagging},
  booktitle = {Proceedings of the Joint Conferenceof the 47th Annual Meeting of the Association for Computational Linguistics and the 4th International Joint Conference on Natural Language Processing of the Asian Federation of Natural Language Processing (ACL-IJCNLP)},
  year = {2009},
  pages = {504--512},
  url = {http://aclweb.org/anthology/P/P09/P09-1057.pdf},
  note = {\textbf{Nominated for the Best Paper Award}},
  abstract = {We describe a novel method for the task
of unsupervised POS tagging with a dictionary,
one that uses integer programming
to explicitly search for the smallest model
that explains the data, and then uses EM
to set parameter values. We evaluate our
method on a standard test corpus using
different standard tagsets (a 45-tagset as
well as a smaller 17-tagset), and show that
our approach performs better than existing
state-of-the-art systems in both settings.}
}
@inproceedings{bodrumlu:knight:ravi-naacl09_ilp-ws,
  author = {Bodrumlu, Tugba and Knight, Kevin and Ravi, Sujith},
  title = {A new objective function for word alignment},
  booktitle = {Proceedings of the NAACL/HLT Workshop on Integer Programming for Natural Language Processing},
  year = {2009},
  pages = {28--35},
  url = {http://aclweb.org/anthology-new/W/W09/W09-1804.pdf},
  abstract = {We develop a new objective function for word
alignment that measures the size of the bilingual
dictionary induced by an alignment. A
word alignment that results in a small dictionary
is preferred over one that results in a large
dictionary. In order to search for the alignment
that minimizes this objective, we cast the
problemas an integer linear program. We then
extend our objective function to align corpora
at the sub-word level, which we demonstrate
on a small Turkish-English corpus.}
}
@inproceedings{ravi:knight-naacl09,
  author = {Ravi, Sujith and Knight, Kevin},
  title = {Learning phoneme mappings for transliteration without parallel data},
  booktitle = {Proceedings of Conference of the North American Chapter of the Association for Computational Linguistics - Human Language Technologies (NAACL/HLT)},
  year = {2009},
  pages = {37--45},
  url = {http://aclweb.org/anthology/N/N09/N09-1005.pdf},
  abstract = {We present a method for performing machine
transliteration without any parallel resources.
We frame the transliteration task as a decipherment
problem and show that it is possible
to learn cross-language phoneme mapping
tables using only monolingual resources. We
compare various methods and evaluate their
accuracies on a standard name transliteration
task.}
}
@inproceedings{ravi:knight-iccpol09,
  author = {Sujith Ravi and
               Kevin Knight},
  title = {Probabilistic Methods for a Japanese Syllable Cipher},
  booktitle = {Proceedings of the 22nd International Conference on the Computer Processing of Oriental Languages (ICCPOL)},
  year = {2009},
  pages = {270-281},
  url = {http://www.springerlink.com/content/r7113hp62731745t/fulltext.pdf},
  abstract = {This paper attacks a Japanese syllable-substitution cipher.
We use a probabilistic, noisy-channel framework, exploiting various Japanese
language models to drive the decipherment. We describe several innova-
tions, including a new objective function for searching for the highest-
scoring decipherment. We include empirical studies of the relevant phenomena, 
  and we give improved decipherment accuracy rates.}
}
@article{ravi:knight-cryptologia09,
  author = {Sujith Ravi and
               Kevin Knight},
  title = {Attacking Letter Substitution Ciphers with Integer Programming},
  journal = {Cryptologia},
  volume = {33},
  number = {4},
  year = {2009},
  pages = {321-334},
  url = {http://www.informaworld.com/10.1080/01611190903030920},
  abstract = {We introduce a method for solving substitution ciphers using low-order letter n-gram models. This method enforces global constraints using integer programming, and it guarantees that no decipherment key is overlooked. We carry out extensive empirical experiments showing how decipherment accuracy varies as a function of cipher length and n-gram order. We also make an empirical investigation of Shannon's (1949) theory of uncertainty in decipherment.}
}
@inproceedings{ravi:knight-emnlp08,
  author = {Sujith Ravi and
               Kevin Knight},
  title = {Attacking Decipherment Problems Optimally with Low-Order
               N-gram Models},
  booktitle = {Proceedings of Conference on Empirical Methods in Natural Language Processing (EMNLP)},
  year = {2008},
  pages = {812-819},
  url = {www.aclweb.org/anthology/D08-1085.pdf},
  abstract = {We introduce a method for solving substitution
ciphers using low-order letter n-gram
models. This method enforces global constraints
using integer programming, and it
guarantees that no decipherment key is overlooked.
We carry out extensive empirical experiments
showing how decipherment accuracy
varies as a function of cipher length and
n-gram order. We also make an empirical investigation
of Shannon's (1949) theory of uncertainty
in decipherment.}
}
@inproceedings{ravi:knight:soricut-emnlp08,
  author = {Sujith Ravi and
               Kevin Knight and
               Radu Soricut},
  title = {Automatic Prediction of Parser Accuracy},
  booktitle = {Proceedings of Conference on Empirical Methods in Natural Language Processing (EMNLP)},
  year = {2008},
  pages = {887-896},
  url = {http://www.aclweb.org/anthology/D08-1093.pdf},
  abstract = {Statistical parsers have become increasingly
accurate, to the point where they are useful in
many natural language applications. However,
estimating parsing accuracy on a wide variety
of domains and genres is still a challenge in
the absence of gold-standard parse trees.
In this paper, we propose a technique that automatically
takes into account certain characteristics
of the domains of interest, and accurately
predicts parser performance on data
from these new domains. As a result, we have
a cheap (no annotation involved) and effective
recipe for measuring the performance of a statistical
parser on any given domain.}
}
@inproceedings{ravi:pasca-cikm08,
  author = {Sujith Ravi and
               Marius Pasca},
  title = {Using structured text for large-scale attribute extraction},
  booktitle = {Proceedings of the 17th ACM  Conference on Information and Knowledge Management (CIKM)},
  year = {2008},
  pages = {1183-1192},
  url = {http://dl.acm.org/citation.cfm?id=1458238},
  abstract = {We propose a weakly-supervised approach for extracting
class attributes from structured text available within Web
documents. The overall precision of the extracted attributes
is around 30\% higher than with previous methods operating
on Web documents. In addition to attribute extraction, this
approach also automatically identifies values for a subset of
the extracted class attributes.}
}
@inproceedings{kim_et_al-its08,
  author = {Jihie Kim and
               Erin Shaw and
               Sujith Ravi and
               Erin Tavano and
               Aniwat Arromratana and
               Pankaj Sarda},
  title = {Scaffolding On-Line Discussions with Past Discussions: An Analysis and Pilot Study of PedaBot},
  booktitle = {Proceedings of the 9th International Conference on Intelligent Tutoring Systems Conference (ITS)},
  year = {2008},
  pages = {343-352},
  url = {http://www.springerlink.com/content/c0p21873t787q388/fulltext.pdf},
  abstract = {PedaBot is a new discussion scaffolding application designed to aid
student knowledge acquisition, promote reflection about course topics and
encourage student participation in discussions. It dynamically processes student
discussions and presents related discussions from a knowledge base of past
discussions. This paper describes the system and presents a comparative
analysis of the information retrieval techniques used to respond to free-form
student discussions, a combination of topic profiling, term frequency-inverse
document frequency, and latent semantic analysis. Responses are presented as
annotated links that students can follow and rate. We report a pilot study of
PedaBot based on student viewings, student ratings, and a small survey. Initial
results indicate that there is a high level of student interest in the feature and
that its responses are moderately relevant to student discussions.}
}
@inproceedings{ravi:kim-aied07a,
  author = {Sujith Ravi and
               Jihie Kim},
  title = {Profiling Student Interactions in Threaded Discussions with Speech Act Classifiers},
  booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence in Education (AIED)},
  year = {2007},
  pages = {357-364},
  url = {www.isi.edu/~jihie/papers/ThreadAssessmt-AIED2007.pdf},
  abstract = {On-line discussion is a popular form of web-based computer-mediated
communication and is an important medium for distance education. Automatic
tools for analyzing online discussions are highly desirable for better information
management and assistance. This paper presents an approach for automatically
profiling student interactions in on-line discussions. Using N-gram features and
linear SVM, we developed ``speech act'' classifiers that identify the roles that
individual messages play. The classifiers were used in finding messages that
contain questions or answers. We then applied a set of thread analysis rules for
identifying threads that may have unanswered questions and need instructor
attention. We evaluated the results with three human annotators, and 70-75\% of
the predictions from the system were consistent with human answers.}
}
@inproceedings{ravi:kim:shaw-aied07b_edm-ws,
  author = {Sujith Ravi and Jihie Kim and Erin Shaw},
  title = {Mining On-line Discussions: Assessing Technical Quality for Student Scaffolding and Classifying Messages for Participation Profiling},
  booktitle = {Proceedings of the Educational Data Mining Workshop in the 13th International Conference on Artificial Intelligence in Education (AIED)},
  year = 2007,
  abstract = {On-line collaborative discussions play an important role in distance
education and web-enhanced courses. Automatic tools for assessing student
activities and promoting collaborative problem solving can provide a better
learning experience for students and also offer useful assistance to teachers. This
paper presents two novel instructional tools that apply data mining and
information retrieval techniques. First, we describe an approach that could be
used to scaffold undergraduate student discussions by retrieving useful
information from past student discussions. The tool exploits both the discussions
from the same undergraduate course and the ones from a graduate-level course.
The second part of the paper presents an instructional tool that profiles student
contributions with respect to student genders and the roles that students play in
discussion. We apply speech act classifiers that automatically identify whether the
given message contains questions and/or answers, and use the classification
results in profiling male and female student contributions. Our initial evaluation
of the scaffolding tool shows that discussions from the same course contain more
number of similar concepts than the ones from the graduate-level course. However,
technical quality of graduate-level discussions is higher. The results from the
profiling tool indicate that female participation in undergraduate-level discussions
is lower than that in graduate-level discussions, and graduate female students post
more questions and answers compared to undergraduate female students.}
}