@inproceedings{corral-saralegi-2024-morphology,
title = "Morphology Aware Source Term Masking for Terminology-Constrained {NMT}",
author = "Corral, Ander and
Saralegi, Xabier",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-eacl.117/",
pages = "1676--1688",
abstract = "Terminology-constrained NMT systems facilitate the forced translation of domain-specific vocabulary. A notable method in this context is the {\textquotedblleft}copy-and-inflect{\textquotedblright} approach, which appends the target term lemmas of constraints to their corresponding source terms in the input sentence. In this work, we propose a novel adaptation of the {\textquotedblleft}copy-and-inflect{\textquotedblright} method, referred to as {\textquotedblleft}morph-masking{\textquotedblright}. Our method involves masking the source terms of the constraints from the input sentence while retaining essential grammatical information. Our approach is based on the hypothesis that {\textquotedblleft}copy-and-inflect{\textquotedblright} systems have access to both source and target terms, allowing them to generate the correct surface form of the constraint by either translating the source term itself or properly inflecting the target term lemma. Through extensive validation of our method in two translation directions with different levels of source morphological complexity, Basque to Spanish and English to German, we have demonstrated that {\textquotedblleft}morph-masking{\textquotedblright} is capable of providing a harder constraint signal, resulting in a notable improvement over the {\textquotedblleft}copy-and-inflect{\textquotedblright} method (up to 38{\%} in term accuracy), especially in challenging constraint scenarios."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="corral-saralegi-2024-morphology">
<titleInfo>
<title>Morphology Aware Source Term Masking for Terminology-Constrained NMT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ander</namePart>
<namePart type="family">Corral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xabier</namePart>
<namePart type="family">Saralegi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Terminology-constrained NMT systems facilitate the forced translation of domain-specific vocabulary. A notable method in this context is the “copy-and-inflect” approach, which appends the target term lemmas of constraints to their corresponding source terms in the input sentence. In this work, we propose a novel adaptation of the “copy-and-inflect” method, referred to as “morph-masking”. Our method involves masking the source terms of the constraints from the input sentence while retaining essential grammatical information. Our approach is based on the hypothesis that “copy-and-inflect” systems have access to both source and target terms, allowing them to generate the correct surface form of the constraint by either translating the source term itself or properly inflecting the target term lemma. Through extensive validation of our method in two translation directions with different levels of source morphological complexity, Basque to Spanish and English to German, we have demonstrated that “morph-masking” is capable of providing a harder constraint signal, resulting in a notable improvement over the “copy-and-inflect” method (up to 38% in term accuracy), especially in challenging constraint scenarios.</abstract>
<identifier type="citekey">corral-saralegi-2024-morphology</identifier>
<location>
<url>https://aclanthology.org/2024.findings-eacl.117/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>1676</start>
<end>1688</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morphology Aware Source Term Masking for Terminology-Constrained NMT
%A Corral, Ander
%A Saralegi, Xabier
%Y Graham, Yvette
%Y Purver, Matthew
%S Findings of the Association for Computational Linguistics: EACL 2024
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F corral-saralegi-2024-morphology
%X Terminology-constrained NMT systems facilitate the forced translation of domain-specific vocabulary. A notable method in this context is the “copy-and-inflect” approach, which appends the target term lemmas of constraints to their corresponding source terms in the input sentence. In this work, we propose a novel adaptation of the “copy-and-inflect” method, referred to as “morph-masking”. Our method involves masking the source terms of the constraints from the input sentence while retaining essential grammatical information. Our approach is based on the hypothesis that “copy-and-inflect” systems have access to both source and target terms, allowing them to generate the correct surface form of the constraint by either translating the source term itself or properly inflecting the target term lemma. Through extensive validation of our method in two translation directions with different levels of source morphological complexity, Basque to Spanish and English to German, we have demonstrated that “morph-masking” is capable of providing a harder constraint signal, resulting in a notable improvement over the “copy-and-inflect” method (up to 38% in term accuracy), especially in challenging constraint scenarios.
%U https://aclanthology.org/2024.findings-eacl.117/
%P 1676-1688
Markdown (Informal)
[Morphology Aware Source Term Masking for Terminology-Constrained NMT](https://aclanthology.org/2024.findings-eacl.117/) (Corral & Saralegi, Findings 2024)
ACL