@inproceedings{kim-etal-2021-bisect,
title = "{B}i{SECT}: Learning to Split and Rephrase Sentences with Bitexts",
author = "Kim, Joongwon and
Maddela, Mounica and
Kriz, Reno and
Xu, Wei and
Callison-Burch, Chris",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.500",
doi = "10.18653/v1/2021.emnlp-main.500",
pages = "6193--6209",
abstract = "An important task in NLP applications such as sentence simplification is the ability to take a long, complex sentence and split it into shorter sentences, rephrasing as necessary. We introduce a novel dataset and a new model for this {`}split and rephrase{'} task. Our BiSECT training data consists of 1 million long English sentences paired with shorter, meaning-equivalent English sentences. We obtain these by extracting 1-2 sentence alignments in bilingual parallel corpora and then using machine translation to convert both sides of the corpus into the same language. BiSECT contains higher quality training examples than the previous Split and Rephrase corpora, with sentence splits that require more significant modifications. We categorize examples in our corpus and use these categories in a novel model that allows us to target specific regions of the input sentence to be split and edited. Moreover, we show that models trained on BiSECT can perform a wider variety of split operations and improve upon previous state-of-the-art approaches in automatic and human evaluations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2021-bisect">
<titleInfo>
<title>BiSECT: Learning to Split and Rephrase Sentences with Bitexts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joongwon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mounica</namePart>
<namePart type="family">Maddela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reno</namePart>
<namePart type="family">Kriz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Callison-Burch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An important task in NLP applications such as sentence simplification is the ability to take a long, complex sentence and split it into shorter sentences, rephrasing as necessary. We introduce a novel dataset and a new model for this ‘split and rephrase’ task. Our BiSECT training data consists of 1 million long English sentences paired with shorter, meaning-equivalent English sentences. We obtain these by extracting 1-2 sentence alignments in bilingual parallel corpora and then using machine translation to convert both sides of the corpus into the same language. BiSECT contains higher quality training examples than the previous Split and Rephrase corpora, with sentence splits that require more significant modifications. We categorize examples in our corpus and use these categories in a novel model that allows us to target specific regions of the input sentence to be split and edited. Moreover, we show that models trained on BiSECT can perform a wider variety of split operations and improve upon previous state-of-the-art approaches in automatic and human evaluations.</abstract>
<identifier type="citekey">kim-etal-2021-bisect</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.500</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.500</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>6193</start>
<end>6209</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BiSECT: Learning to Split and Rephrase Sentences with Bitexts
%A Kim, Joongwon
%A Maddela, Mounica
%A Kriz, Reno
%A Xu, Wei
%A Callison-Burch, Chris
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F kim-etal-2021-bisect
%X An important task in NLP applications such as sentence simplification is the ability to take a long, complex sentence and split it into shorter sentences, rephrasing as necessary. We introduce a novel dataset and a new model for this ‘split and rephrase’ task. Our BiSECT training data consists of 1 million long English sentences paired with shorter, meaning-equivalent English sentences. We obtain these by extracting 1-2 sentence alignments in bilingual parallel corpora and then using machine translation to convert both sides of the corpus into the same language. BiSECT contains higher quality training examples than the previous Split and Rephrase corpora, with sentence splits that require more significant modifications. We categorize examples in our corpus and use these categories in a novel model that allows us to target specific regions of the input sentence to be split and edited. Moreover, we show that models trained on BiSECT can perform a wider variety of split operations and improve upon previous state-of-the-art approaches in automatic and human evaluations.
%R 10.18653/v1/2021.emnlp-main.500
%U https://aclanthology.org/2021.emnlp-main.500
%U https://doi.org/10.18653/v1/2021.emnlp-main.500
%P 6193-6209
Markdown (Informal)
[BiSECT: Learning to Split and Rephrase Sentences with Bitexts](https://aclanthology.org/2021.emnlp-main.500) (Kim et al., EMNLP 2021)
ACL
- Joongwon Kim, Mounica Maddela, Reno Kriz, Wei Xu, and Chris Callison-Burch. 2021. BiSECT: Learning to Split and Rephrase Sentences with Bitexts. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 6193–6209, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.