@inproceedings{bogoychev-chen-2021-highs,
title = "The Highs and Lows of Simple Lexical Domain Adaptation Approaches for Neural Machine Translation",
author = "Bogoychev, Nikolay and
Chen, Pinzhen",
editor = "Sedoc, Jo{\~a}o and
Rogers, Anna and
Rumshisky, Anna and
Tafreshi, Shabnam",
booktitle = "Proceedings of the Second Workshop on Insights from Negative Results in NLP",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.insights-1.12/",
doi = "10.18653/v1/2021.insights-1.12",
pages = "74--80",
abstract = "Machine translation systems are vulnerable to domain mismatch, especially in a low-resource scenario. Out-of-domain translations are often of poor quality and prone to hallucinations, due to exposure bias and the decoder acting as a language model. We adopt two approaches to alleviate this problem: lexical shortlisting restricted by IBM statistical alignments, and hypothesis reranking based on similarity. The methods are computationally cheap and show success on low-resource out-of-domain test sets. However, the methods lose advantage when there is sufficient data or too great domain mismatch. This is due to both the IBM model losing its advantage over the implicitly learned neural alignment, and issues with subword segmentation of unseen words."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bogoychev-chen-2021-highs">
<titleInfo>
<title>The Highs and Lows of Simple Lexical Domain Adaptation Approaches for Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolay</namePart>
<namePart type="family">Bogoychev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pinzhen</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation systems are vulnerable to domain mismatch, especially in a low-resource scenario. Out-of-domain translations are often of poor quality and prone to hallucinations, due to exposure bias and the decoder acting as a language model. We adopt two approaches to alleviate this problem: lexical shortlisting restricted by IBM statistical alignments, and hypothesis reranking based on similarity. The methods are computationally cheap and show success on low-resource out-of-domain test sets. However, the methods lose advantage when there is sufficient data or too great domain mismatch. This is due to both the IBM model losing its advantage over the implicitly learned neural alignment, and issues with subword segmentation of unseen words.</abstract>
<identifier type="citekey">bogoychev-chen-2021-highs</identifier>
<identifier type="doi">10.18653/v1/2021.insights-1.12</identifier>
<location>
<url>https://aclanthology.org/2021.insights-1.12/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>74</start>
<end>80</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Highs and Lows of Simple Lexical Domain Adaptation Approaches for Neural Machine Translation
%A Bogoychev, Nikolay
%A Chen, Pinzhen
%Y Sedoc, João
%Y Rogers, Anna
%Y Rumshisky, Anna
%Y Tafreshi, Shabnam
%S Proceedings of the Second Workshop on Insights from Negative Results in NLP
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F bogoychev-chen-2021-highs
%X Machine translation systems are vulnerable to domain mismatch, especially in a low-resource scenario. Out-of-domain translations are often of poor quality and prone to hallucinations, due to exposure bias and the decoder acting as a language model. We adopt two approaches to alleviate this problem: lexical shortlisting restricted by IBM statistical alignments, and hypothesis reranking based on similarity. The methods are computationally cheap and show success on low-resource out-of-domain test sets. However, the methods lose advantage when there is sufficient data or too great domain mismatch. This is due to both the IBM model losing its advantage over the implicitly learned neural alignment, and issues with subword segmentation of unseen words.
%R 10.18653/v1/2021.insights-1.12
%U https://aclanthology.org/2021.insights-1.12/
%U https://doi.org/10.18653/v1/2021.insights-1.12
%P 74-80
Markdown (Informal)
[The Highs and Lows of Simple Lexical Domain Adaptation Approaches for Neural Machine Translation](https://aclanthology.org/2021.insights-1.12/) (Bogoychev & Chen, insights 2021)
ACL