@inproceedings{fomicheva-etal-2022-translation,
title = "Translation Error Detection as Rationale Extraction",
author = "Fomicheva, Marina and
Specia, Lucia and
Aletras, Nikolaos",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.327",
doi = "10.18653/v1/2022.findings-acl.327",
pages = "4148--4159",
abstract = "Recent Quality Estimation (QE) models based on multilingual pre-trained representations have achieved very competitive results in predicting the overall quality of translated sentences. However, detecting specifically which translated words are incorrect is a more challenging task, especially when dealing with limited amounts of training data. We hypothesize that, not unlike humans, successful QE models rely on translation errors to predict overall sentence quality. By exploring a set of feature attribution methods that assign relevance scores to the inputs to explain model predictions, we study the behaviour of state-of-the-art sentence-level QE models and show that explanations (i.e. rationales) extracted from these models can indeed be used to detect translation errors. We therefore (i) introduce a novel semi-supervised method for word-level QE; and (ii) propose to use the QE task as a new benchmark for evaluating the plausibility of feature attribution, i.e. how interpretable model explanations are to humans.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fomicheva-etal-2022-translation">
<titleInfo>
<title>Translation Error Detection as Rationale Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Fomicheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent Quality Estimation (QE) models based on multilingual pre-trained representations have achieved very competitive results in predicting the overall quality of translated sentences. However, detecting specifically which translated words are incorrect is a more challenging task, especially when dealing with limited amounts of training data. We hypothesize that, not unlike humans, successful QE models rely on translation errors to predict overall sentence quality. By exploring a set of feature attribution methods that assign relevance scores to the inputs to explain model predictions, we study the behaviour of state-of-the-art sentence-level QE models and show that explanations (i.e. rationales) extracted from these models can indeed be used to detect translation errors. We therefore (i) introduce a novel semi-supervised method for word-level QE; and (ii) propose to use the QE task as a new benchmark for evaluating the plausibility of feature attribution, i.e. how interpretable model explanations are to humans.</abstract>
<identifier type="citekey">fomicheva-etal-2022-translation</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.327</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.327</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>4148</start>
<end>4159</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Translation Error Detection as Rationale Extraction
%A Fomicheva, Marina
%A Specia, Lucia
%A Aletras, Nikolaos
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F fomicheva-etal-2022-translation
%X Recent Quality Estimation (QE) models based on multilingual pre-trained representations have achieved very competitive results in predicting the overall quality of translated sentences. However, detecting specifically which translated words are incorrect is a more challenging task, especially when dealing with limited amounts of training data. We hypothesize that, not unlike humans, successful QE models rely on translation errors to predict overall sentence quality. By exploring a set of feature attribution methods that assign relevance scores to the inputs to explain model predictions, we study the behaviour of state-of-the-art sentence-level QE models and show that explanations (i.e. rationales) extracted from these models can indeed be used to detect translation errors. We therefore (i) introduce a novel semi-supervised method for word-level QE; and (ii) propose to use the QE task as a new benchmark for evaluating the plausibility of feature attribution, i.e. how interpretable model explanations are to humans.
%R 10.18653/v1/2022.findings-acl.327
%U https://aclanthology.org/2022.findings-acl.327
%U https://doi.org/10.18653/v1/2022.findings-acl.327
%P 4148-4159
Markdown (Informal)
[Translation Error Detection as Rationale Extraction](https://aclanthology.org/2022.findings-acl.327) (Fomicheva et al., Findings 2022)
ACL
- Marina Fomicheva, Lucia Specia, and Nikolaos Aletras. 2022. Translation Error Detection as Rationale Extraction. In Findings of the Association for Computational Linguistics: ACL 2022, pages 4148–4159, Dublin, Ireland. Association for Computational Linguistics.