@article{niu-etal-2023-bridging,
title = "Bridging the Gap between Synthetic and Natural Questions via Sentence Decomposition for Semantic Parsing",
author = "Niu, Yilin and
Huang, Fei and
Liu, Wei and
Cui, Jianwei and
Wang, Bin and
Huang, Minlie",
journal = "Transactions of the Association for Computational Linguistics",
volume = "11",
year = "2023",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2023.tacl-1.22/",
doi = "10.1162/tacl_a_00552",
pages = "367--383",
abstract = "Semantic parsing maps natural language questions into logical forms, which can be executed against a knowledge base for answers. In real-world applications, the performance of a parser is often limited by the lack of training data. To facilitate zero-shot learning, data synthesis has been widely studied to automatically generate paired questions and logical forms. However, data synthesis methods can hardly cover the diverse structures in natural languages, leading to a large gap in sentence structure between synthetic and natural questions. In this paper, we propose a decomposition-based method to unify the sentence structures of questions, which benefits the generalization to natural questions. Experiments demonstrate that our method significantly improves the semantic parser trained on synthetic data (+7.9{\%} on KQA and +8.9{\%} on ComplexWebQuestions in terms of exact match accuracy). Extensive analysis demonstrates that our method can better generalize to natural questions with novel text expressions compared with baselines. Besides semantic parsing, our idea potentially benefits other semantic understanding tasks by mitigating the distracting structure features. To illustrate this, we extend our method to the task of sentence embedding learning, and observe substantial improvements on sentence retrieval (+13.1{\%} for Hit@1)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="niu-etal-2023-bridging">
<titleInfo>
<title>Bridging the Gap between Synthetic and Natural Questions via Sentence Decomposition for Semantic Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yilin</namePart>
<namePart type="family">Niu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianwei</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minlie</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Semantic parsing maps natural language questions into logical forms, which can be executed against a knowledge base for answers. In real-world applications, the performance of a parser is often limited by the lack of training data. To facilitate zero-shot learning, data synthesis has been widely studied to automatically generate paired questions and logical forms. However, data synthesis methods can hardly cover the diverse structures in natural languages, leading to a large gap in sentence structure between synthetic and natural questions. In this paper, we propose a decomposition-based method to unify the sentence structures of questions, which benefits the generalization to natural questions. Experiments demonstrate that our method significantly improves the semantic parser trained on synthetic data (+7.9% on KQA and +8.9% on ComplexWebQuestions in terms of exact match accuracy). Extensive analysis demonstrates that our method can better generalize to natural questions with novel text expressions compared with baselines. Besides semantic parsing, our idea potentially benefits other semantic understanding tasks by mitigating the distracting structure features. To illustrate this, we extend our method to the task of sentence embedding learning, and observe substantial improvements on sentence retrieval (+13.1% for Hit@1).</abstract>
<identifier type="citekey">niu-etal-2023-bridging</identifier>
<identifier type="doi">10.1162/tacl_a_00552</identifier>
<location>
<url>https://aclanthology.org/2023.tacl-1.22/</url>
</location>
<part>
<date>2023</date>
<detail type="volume"><number>11</number></detail>
<extent unit="page">
<start>367</start>
<end>383</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Bridging the Gap between Synthetic and Natural Questions via Sentence Decomposition for Semantic Parsing
%A Niu, Yilin
%A Huang, Fei
%A Liu, Wei
%A Cui, Jianwei
%A Wang, Bin
%A Huang, Minlie
%J Transactions of the Association for Computational Linguistics
%D 2023
%V 11
%I MIT Press
%C Cambridge, MA
%F niu-etal-2023-bridging
%X Semantic parsing maps natural language questions into logical forms, which can be executed against a knowledge base for answers. In real-world applications, the performance of a parser is often limited by the lack of training data. To facilitate zero-shot learning, data synthesis has been widely studied to automatically generate paired questions and logical forms. However, data synthesis methods can hardly cover the diverse structures in natural languages, leading to a large gap in sentence structure between synthetic and natural questions. In this paper, we propose a decomposition-based method to unify the sentence structures of questions, which benefits the generalization to natural questions. Experiments demonstrate that our method significantly improves the semantic parser trained on synthetic data (+7.9% on KQA and +8.9% on ComplexWebQuestions in terms of exact match accuracy). Extensive analysis demonstrates that our method can better generalize to natural questions with novel text expressions compared with baselines. Besides semantic parsing, our idea potentially benefits other semantic understanding tasks by mitigating the distracting structure features. To illustrate this, we extend our method to the task of sentence embedding learning, and observe substantial improvements on sentence retrieval (+13.1% for Hit@1).
%R 10.1162/tacl_a_00552
%U https://aclanthology.org/2023.tacl-1.22/
%U https://doi.org/10.1162/tacl_a_00552
%P 367-383
Markdown (Informal)
[Bridging the Gap between Synthetic and Natural Questions via Sentence Decomposition for Semantic Parsing](https://aclanthology.org/2023.tacl-1.22/) (Niu et al., TACL 2023)
ACL