@article{ye-etal-2020-sketch,
title = "Sketch-Driven Regular Expression Generation from Natural Language and Examples",
author = "Ye, Xi and
Chen, Qiaochu and
Wang, Xinyu and
Dillig, Isil and
Durrett, Greg",
editor = "Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "8",
year = "2020",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2020.tacl-1.44",
doi = "10.1162/tacl_a_00339",
pages = "679--694",
abstract = "Recent systems for converting natural language descriptions into regular expressions (regexes) have achieved some success, but typically deal with short, formulaic text and can only produce simple regexes. Real-world regexes are complex, hard to describe with brief sentences, and sometimes require examples to fully convey the user{'}s intent. We present a framework for regex synthesis in this setting where both natural language (NL) and examples are available. First, a semantic parser (either grammar-based or neural) maps the natural language description into an intermediate sketch, which is an incomplete regex containing holes to denote missing components. Then a program synthesizer searches over the regex space defined by the sketch and finds a regex that is consistent with the given string examples. Our semantic parser can be trained purely from weak supervision based on correctness of the synthesized regex, or it can leverage heuristically derived sketches. We evaluate on two prior datasets (Kushman and Barzilay 2013; Locascio et al. 2016) and a real-world dataset from Stack Overflow. Our system achieves state-of-the-art performance on the prior datasets and solves 57{\%} of the real-world dataset, which existing neural systems completely fail on.1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ye-etal-2020-sketch">
<titleInfo>
<title>Sketch-Driven Regular Expression Generation from Natural Language and Examples</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xi</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiaochu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinyu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isil</namePart>
<namePart type="family">Dillig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Recent systems for converting natural language descriptions into regular expressions (regexes) have achieved some success, but typically deal with short, formulaic text and can only produce simple regexes. Real-world regexes are complex, hard to describe with brief sentences, and sometimes require examples to fully convey the user’s intent. We present a framework for regex synthesis in this setting where both natural language (NL) and examples are available. First, a semantic parser (either grammar-based or neural) maps the natural language description into an intermediate sketch, which is an incomplete regex containing holes to denote missing components. Then a program synthesizer searches over the regex space defined by the sketch and finds a regex that is consistent with the given string examples. Our semantic parser can be trained purely from weak supervision based on correctness of the synthesized regex, or it can leverage heuristically derived sketches. We evaluate on two prior datasets (Kushman and Barzilay 2013; Locascio et al. 2016) and a real-world dataset from Stack Overflow. Our system achieves state-of-the-art performance on the prior datasets and solves 57% of the real-world dataset, which existing neural systems completely fail on.1</abstract>
<identifier type="citekey">ye-etal-2020-sketch</identifier>
<identifier type="doi">10.1162/tacl_a_00339</identifier>
<location>
<url>https://aclanthology.org/2020.tacl-1.44</url>
</location>
<part>
<date>2020</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>679</start>
<end>694</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Sketch-Driven Regular Expression Generation from Natural Language and Examples
%A Ye, Xi
%A Chen, Qiaochu
%A Wang, Xinyu
%A Dillig, Isil
%A Durrett, Greg
%J Transactions of the Association for Computational Linguistics
%D 2020
%V 8
%I MIT Press
%C Cambridge, MA
%F ye-etal-2020-sketch
%X Recent systems for converting natural language descriptions into regular expressions (regexes) have achieved some success, but typically deal with short, formulaic text and can only produce simple regexes. Real-world regexes are complex, hard to describe with brief sentences, and sometimes require examples to fully convey the user’s intent. We present a framework for regex synthesis in this setting where both natural language (NL) and examples are available. First, a semantic parser (either grammar-based or neural) maps the natural language description into an intermediate sketch, which is an incomplete regex containing holes to denote missing components. Then a program synthesizer searches over the regex space defined by the sketch and finds a regex that is consistent with the given string examples. Our semantic parser can be trained purely from weak supervision based on correctness of the synthesized regex, or it can leverage heuristically derived sketches. We evaluate on two prior datasets (Kushman and Barzilay 2013; Locascio et al. 2016) and a real-world dataset from Stack Overflow. Our system achieves state-of-the-art performance on the prior datasets and solves 57% of the real-world dataset, which existing neural systems completely fail on.1
%R 10.1162/tacl_a_00339
%U https://aclanthology.org/2020.tacl-1.44
%U https://doi.org/10.1162/tacl_a_00339
%P 679-694
Markdown (Informal)
[Sketch-Driven Regular Expression Generation from Natural Language and Examples](https://aclanthology.org/2020.tacl-1.44) (Ye et al., TACL 2020)
ACL