@inproceedings{courtland-etal-2020-efficient,
title = "Efficient Automatic Punctuation Restoration Using Bidirectional Transformers with Robust Inference",
author = "Courtland, Maury and
Faulkner, Adam and
McElvain, Gayle",
editor = {Federico, Marcello and
Waibel, Alex and
Knight, Kevin and
Nakamura, Satoshi and
Ney, Hermann and
Niehues, Jan and
St{\"u}ker, Sebastian and
Wu, Dekai and
Mariani, Joseph and
Yvon, Francois},
booktitle = "Proceedings of the 17th International Conference on Spoken Language Translation",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.iwslt-1.33",
doi = "10.18653/v1/2020.iwslt-1.33",
pages = "272--279",
abstract = "Though people rarely speak in complete sentences, punctuation confers many benefits to the readers of transcribed speech. Unfortunately, most ASR systems do not produce punctuated output. To address this, we propose a solution for automatic punctuation that is both cost efficient and easy to train. Our solution benefits from the recent trend in fine-tuning transformer-based language models. We also modify the typical framing of this task by predicting punctuation for sequences rather than individual tokens, which makes for more efficient training and inference. Finally, we find that aggregating predictions across multiple context windows improves accuracy even further. Our best model achieves a new state of the art on benchmark data (TED Talks) with a combined F1 of 83.9, representing a 48.7{\%} relative improvement (15.3 absolute) over the previous state of the art.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="courtland-etal-2020-efficient">
<titleInfo>
<title>Efficient Automatic Punctuation Restoration Using Bidirectional Transformers with Robust Inference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maury</namePart>
<namePart type="family">Courtland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Faulkner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gayle</namePart>
<namePart type="family">McElvain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Spoken Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Knight</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hermann</namePart>
<namePart type="family">Ney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francois</namePart>
<namePart type="family">Yvon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Though people rarely speak in complete sentences, punctuation confers many benefits to the readers of transcribed speech. Unfortunately, most ASR systems do not produce punctuated output. To address this, we propose a solution for automatic punctuation that is both cost efficient and easy to train. Our solution benefits from the recent trend in fine-tuning transformer-based language models. We also modify the typical framing of this task by predicting punctuation for sequences rather than individual tokens, which makes for more efficient training and inference. Finally, we find that aggregating predictions across multiple context windows improves accuracy even further. Our best model achieves a new state of the art on benchmark data (TED Talks) with a combined F1 of 83.9, representing a 48.7% relative improvement (15.3 absolute) over the previous state of the art.</abstract>
<identifier type="citekey">courtland-etal-2020-efficient</identifier>
<identifier type="doi">10.18653/v1/2020.iwslt-1.33</identifier>
<location>
<url>https://aclanthology.org/2020.iwslt-1.33</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>272</start>
<end>279</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Efficient Automatic Punctuation Restoration Using Bidirectional Transformers with Robust Inference
%A Courtland, Maury
%A Faulkner, Adam
%A McElvain, Gayle
%Y Federico, Marcello
%Y Waibel, Alex
%Y Knight, Kevin
%Y Nakamura, Satoshi
%Y Ney, Hermann
%Y Niehues, Jan
%Y Stüker, Sebastian
%Y Wu, Dekai
%Y Mariani, Joseph
%Y Yvon, Francois
%S Proceedings of the 17th International Conference on Spoken Language Translation
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F courtland-etal-2020-efficient
%X Though people rarely speak in complete sentences, punctuation confers many benefits to the readers of transcribed speech. Unfortunately, most ASR systems do not produce punctuated output. To address this, we propose a solution for automatic punctuation that is both cost efficient and easy to train. Our solution benefits from the recent trend in fine-tuning transformer-based language models. We also modify the typical framing of this task by predicting punctuation for sequences rather than individual tokens, which makes for more efficient training and inference. Finally, we find that aggregating predictions across multiple context windows improves accuracy even further. Our best model achieves a new state of the art on benchmark data (TED Talks) with a combined F1 of 83.9, representing a 48.7% relative improvement (15.3 absolute) over the previous state of the art.
%R 10.18653/v1/2020.iwslt-1.33
%U https://aclanthology.org/2020.iwslt-1.33
%U https://doi.org/10.18653/v1/2020.iwslt-1.33
%P 272-279
Markdown (Informal)
[Efficient Automatic Punctuation Restoration Using Bidirectional Transformers with Robust Inference](https://aclanthology.org/2020.iwslt-1.33) (Courtland et al., IWSLT 2020)
ACL