@inproceedings{molfese-etal-2024-neuralign,
title = "CroCoAlign: A Cross-Lingual, Context-Aware and Fully-Neural Sentence Alignment System for Long Texts",
author = "Molfese, Francesco and
Bejgu, Andrei and
Tedeschi, Simone and
Conia, Simone and
Navigli, Roberto",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-long.135",
pages = "2209--2220",
abstract = "Sentence alignment {--} establishing links between corresponding sentences in two related documents {--} is an important NLP task with several downstream applications, such as machine translation (MT). Despite the fact that existing sentence alignment systems have achieved promising results, their effectiveness is based on auxiliary information such as document metadata or machine-generated translations, as well as hyperparameter-sensitive techniques. Moreover, these systems often overlook the crucial role that context plays in the alignment process. In this paper, we address the aforementioned issues and propose CroCoAlign: the first context-aware, end-to-end and fully neural architecture for sentence alignment. Our system maps source and target sentences in long documents by contextualizing their sentence embeddings with respect to the other sentences in the document. We extensively evaluate CroCoAlign on a multilingual dataset consisting of 20 language pairs derived from the Opus project, and demonstrate that our model achieves state-of-the-art performance. To ensure reproducibility, we release our code and model checkpoints at https://github.com/Babelscape/CroCoAlign.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="molfese-etal-2024-neuralign">
<titleInfo>
<title>CroCoAlign: A Cross-Lingual, Context-Aware and Fully-Neural Sentence Alignment System for Long Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Molfese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Bejgu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Tedeschi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Conia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentence alignment – establishing links between corresponding sentences in two related documents – is an important NLP task with several downstream applications, such as machine translation (MT). Despite the fact that existing sentence alignment systems have achieved promising results, their effectiveness is based on auxiliary information such as document metadata or machine-generated translations, as well as hyperparameter-sensitive techniques. Moreover, these systems often overlook the crucial role that context plays in the alignment process. In this paper, we address the aforementioned issues and propose CroCoAlign: the first context-aware, end-to-end and fully neural architecture for sentence alignment. Our system maps source and target sentences in long documents by contextualizing their sentence embeddings with respect to the other sentences in the document. We extensively evaluate CroCoAlign on a multilingual dataset consisting of 20 language pairs derived from the Opus project, and demonstrate that our model achieves state-of-the-art performance. To ensure reproducibility, we release our code and model checkpoints at https://github.com/Babelscape/CroCoAlign.</abstract>
<identifier type="citekey">molfese-etal-2024-neuralign</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-long.135</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>2209</start>
<end>2220</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CroCoAlign: A Cross-Lingual, Context-Aware and Fully-Neural Sentence Alignment System for Long Texts
%A Molfese, Francesco
%A Bejgu, Andrei
%A Tedeschi, Simone
%A Conia, Simone
%A Navigli, Roberto
%Y Graham, Yvette
%Y Purver, Matthew
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F molfese-etal-2024-neuralign
%X Sentence alignment – establishing links between corresponding sentences in two related documents – is an important NLP task with several downstream applications, such as machine translation (MT). Despite the fact that existing sentence alignment systems have achieved promising results, their effectiveness is based on auxiliary information such as document metadata or machine-generated translations, as well as hyperparameter-sensitive techniques. Moreover, these systems often overlook the crucial role that context plays in the alignment process. In this paper, we address the aforementioned issues and propose CroCoAlign: the first context-aware, end-to-end and fully neural architecture for sentence alignment. Our system maps source and target sentences in long documents by contextualizing their sentence embeddings with respect to the other sentences in the document. We extensively evaluate CroCoAlign on a multilingual dataset consisting of 20 language pairs derived from the Opus project, and demonstrate that our model achieves state-of-the-art performance. To ensure reproducibility, we release our code and model checkpoints at https://github.com/Babelscape/CroCoAlign.
%U https://aclanthology.org/2024.eacl-long.135
%P 2209-2220
Markdown (Informal)
[CroCoAlign: A Cross-Lingual, Context-Aware and Fully-Neural Sentence Alignment System for Long Texts](https://aclanthology.org/2024.eacl-long.135) (Molfese et al., EACL 2024)
ACL