@inproceedings{prabhu-kann-2020-making,
title = "Making a Point: Pointer-Generator Transformers for Disjoint Vocabularies",
author = "Prabhu, Nikhil and
Kann, Katharina",
editor = "Shmueli, Boaz and
Huang, Yin Jou",
booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.aacl-srw.13",
doi = "10.18653/v1/2020.aacl-srw.13",
pages = "85--92",
abstract = "Explicit mechanisms for copying have improved the performance of neural models for sequence-to-sequence tasks in the low-resource setting. However, they rely on an overlap between source and target vocabularies. Here, we propose a model that does not: a pointer-generator transformer for disjoint vocabularies. We apply our model to a low-resource version of the grapheme-to-phoneme conversion (G2P) task, and show that it outperforms a standard transformer by an average of 5.1 WER over 15 languages. While our model does not beat the the best performing baseline, we demonstrate that it provides complementary information to it: an oracle that combines the best outputs of the two models improves over the strongest baseline by 7.7 WER on average in the low-resource setting. In the high-resource setting, our model performs comparably to a standard transformer.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prabhu-kann-2020-making">
<titleInfo>
<title>Making a Point: Pointer-Generator Transformers for Disjoint Vocabularies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikhil</namePart>
<namePart type="family">Prabhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Boaz</namePart>
<namePart type="family">Shmueli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yin</namePart>
<namePart type="given">Jou</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Explicit mechanisms for copying have improved the performance of neural models for sequence-to-sequence tasks in the low-resource setting. However, they rely on an overlap between source and target vocabularies. Here, we propose a model that does not: a pointer-generator transformer for disjoint vocabularies. We apply our model to a low-resource version of the grapheme-to-phoneme conversion (G2P) task, and show that it outperforms a standard transformer by an average of 5.1 WER over 15 languages. While our model does not beat the the best performing baseline, we demonstrate that it provides complementary information to it: an oracle that combines the best outputs of the two models improves over the strongest baseline by 7.7 WER on average in the low-resource setting. In the high-resource setting, our model performs comparably to a standard transformer.</abstract>
<identifier type="citekey">prabhu-kann-2020-making</identifier>
<identifier type="doi">10.18653/v1/2020.aacl-srw.13</identifier>
<location>
<url>https://aclanthology.org/2020.aacl-srw.13</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>85</start>
<end>92</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Making a Point: Pointer-Generator Transformers for Disjoint Vocabularies
%A Prabhu, Nikhil
%A Kann, Katharina
%Y Shmueli, Boaz
%Y Huang, Yin Jou
%S Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F prabhu-kann-2020-making
%X Explicit mechanisms for copying have improved the performance of neural models for sequence-to-sequence tasks in the low-resource setting. However, they rely on an overlap between source and target vocabularies. Here, we propose a model that does not: a pointer-generator transformer for disjoint vocabularies. We apply our model to a low-resource version of the grapheme-to-phoneme conversion (G2P) task, and show that it outperforms a standard transformer by an average of 5.1 WER over 15 languages. While our model does not beat the the best performing baseline, we demonstrate that it provides complementary information to it: an oracle that combines the best outputs of the two models improves over the strongest baseline by 7.7 WER on average in the low-resource setting. In the high-resource setting, our model performs comparably to a standard transformer.
%R 10.18653/v1/2020.aacl-srw.13
%U https://aclanthology.org/2020.aacl-srw.13
%U https://doi.org/10.18653/v1/2020.aacl-srw.13
%P 85-92
Markdown (Informal)
[Making a Point: Pointer-Generator Transformers for Disjoint Vocabularies](https://aclanthology.org/2020.aacl-srw.13) (Prabhu & Kann, AACL 2020)
ACL
- Nikhil Prabhu and Katharina Kann. 2020. Making a Point: Pointer-Generator Transformers for Disjoint Vocabularies. In Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop, pages 85–92, Suzhou, China. Association for Computational Linguistics.