@inproceedings{setha-aliane-2023-bilingual,
title = "Bilingual Terminology Alignment Using Contextualized Embeddings",
author = "Setha, Imene and
Aliane, Hassina",
editor = "Haddad, Amal Haddad and
Terryn, Ayla Rigouts and
Mitkov, Ruslan and
Rapp, Reinhard and
Zweigenbaum, Pierre and
Sharoff, Serge",
booktitle = "Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC)",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.contents-1.1",
pages = "1--8",
abstract = "Terminology Alignment faces big challenges in NLP because of the dynamic nature of terms. Fortunately, over these last few years, Deep Learning models showed very good progress with several NLP tasks such as multilingual data resourcing, glossary building, terminology understanding. . . etc. In this work, we propose a new method for terminology alignment from a comparable corpus (Arabic/French languages) for the Algerian culture field. We aim to improve bilingual alignment based on contextual information of a term and to create a significant term bank i.e. a bilingual Arabic-French dictionary. We propose to create word embeddings for both Arabic and French languages using ELMO model focusing on contextual features of terms. Then, we mapp those embeddings using Seq2seq model. We use multilingual-BERT and All-MiniLM-L6 as baseline mod- els to compare terminology alignment results. Lastly we study the performance of these models by applying evaluation methods. Experimentation{'}s showed quite satisfying alignment results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="setha-aliane-2023-bilingual">
<titleInfo>
<title>Bilingual Terminology Alignment Using Contextualized Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Imene</namePart>
<namePart type="family">Setha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hassina</namePart>
<namePart type="family">Aliane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amal</namePart>
<namePart type="given">Haddad</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayla</namePart>
<namePart type="given">Rigouts</namePart>
<namePart type="family">Terryn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Terminology Alignment faces big challenges in NLP because of the dynamic nature of terms. Fortunately, over these last few years, Deep Learning models showed very good progress with several NLP tasks such as multilingual data resourcing, glossary building, terminology understanding. . . etc. In this work, we propose a new method for terminology alignment from a comparable corpus (Arabic/French languages) for the Algerian culture field. We aim to improve bilingual alignment based on contextual information of a term and to create a significant term bank i.e. a bilingual Arabic-French dictionary. We propose to create word embeddings for both Arabic and French languages using ELMO model focusing on contextual features of terms. Then, we mapp those embeddings using Seq2seq model. We use multilingual-BERT and All-MiniLM-L6 as baseline mod- els to compare terminology alignment results. Lastly we study the performance of these models by applying evaluation methods. Experimentation’s showed quite satisfying alignment results.</abstract>
<identifier type="citekey">setha-aliane-2023-bilingual</identifier>
<location>
<url>https://aclanthology.org/2023.contents-1.1</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>1</start>
<end>8</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bilingual Terminology Alignment Using Contextualized Embeddings
%A Setha, Imene
%A Aliane, Hassina
%Y Haddad, Amal Haddad
%Y Terryn, Ayla Rigouts
%Y Mitkov, Ruslan
%Y Rapp, Reinhard
%Y Zweigenbaum, Pierre
%Y Sharoff, Serge
%S Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC)
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F setha-aliane-2023-bilingual
%X Terminology Alignment faces big challenges in NLP because of the dynamic nature of terms. Fortunately, over these last few years, Deep Learning models showed very good progress with several NLP tasks such as multilingual data resourcing, glossary building, terminology understanding. . . etc. In this work, we propose a new method for terminology alignment from a comparable corpus (Arabic/French languages) for the Algerian culture field. We aim to improve bilingual alignment based on contextual information of a term and to create a significant term bank i.e. a bilingual Arabic-French dictionary. We propose to create word embeddings for both Arabic and French languages using ELMO model focusing on contextual features of terms. Then, we mapp those embeddings using Seq2seq model. We use multilingual-BERT and All-MiniLM-L6 as baseline mod- els to compare terminology alignment results. Lastly we study the performance of these models by applying evaluation methods. Experimentation’s showed quite satisfying alignment results.
%U https://aclanthology.org/2023.contents-1.1
%P 1-8
Markdown (Informal)
[Bilingual Terminology Alignment Using Contextualized Embeddings](https://aclanthology.org/2023.contents-1.1) (Setha & Aliane, ConTeNTS-WS 2023)
ACL
- Imene Setha and Hassina Aliane. 2023. Bilingual Terminology Alignment Using Contextualized Embeddings. In Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC), pages 1–8, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.