@inproceedings{roy-dipta-vallurupalli-2024-umbclu,
title = "{UMBCLU} at {S}em{E}val-2024 Task 1: Semantic Textual Relatedness with and without machine translation",
author = "Roy Dipta, Shubhashis and
Vallurupalli, Sai",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.195",
doi = "10.18653/v1/2024.semeval-1.195",
pages = "1351--1357",
abstract = "The aim of SemEval-2024 Task 1, {``}Semantic Textual Relatedness for African and Asian Languages{''} is to develop models for identifying semantic textual relatedness (STR) between two sentences using multiple languages (14 African and Asian languages) and settings (supervised, unsupervised, and cross-lingual). Large language models (LLMs) have shown impressive performance on several natural language understanding tasks such as multilingual machine translation (MMT), semantic similarity (STS), and encoding sentence embeddings. Using a combination of LLMs that perform well on these tasks, we developed two STR models, TranSem and FineSem, for the supervised and cross-lingual settings. We explore the effectiveness of several training methods and the usefulness of machine translation. We find that direct fine-tuning on the task is comparable to using sentence embeddings and translating to English leads to better performance for some languages. In the supervised setting, our model performance is better than the official baseline for 3 languages with the remaining 4 performing on par. In the cross-lingual setting, our model performance is better than the baseline for 3 languages (leading to 1st place for Africaans and 2nd place for Indonesian), is on par for 2 languages and performs poorly on the remaining 7 languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="roy-dipta-vallurupalli-2024-umbclu">
<titleInfo>
<title>UMBCLU at SemEval-2024 Task 1: Semantic Textual Relatedness with and without machine translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubhashis</namePart>
<namePart type="family">Roy Dipta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="family">Vallurupalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The aim of SemEval-2024 Task 1, “Semantic Textual Relatedness for African and Asian Languages” is to develop models for identifying semantic textual relatedness (STR) between two sentences using multiple languages (14 African and Asian languages) and settings (supervised, unsupervised, and cross-lingual). Large language models (LLMs) have shown impressive performance on several natural language understanding tasks such as multilingual machine translation (MMT), semantic similarity (STS), and encoding sentence embeddings. Using a combination of LLMs that perform well on these tasks, we developed two STR models, TranSem and FineSem, for the supervised and cross-lingual settings. We explore the effectiveness of several training methods and the usefulness of machine translation. We find that direct fine-tuning on the task is comparable to using sentence embeddings and translating to English leads to better performance for some languages. In the supervised setting, our model performance is better than the official baseline for 3 languages with the remaining 4 performing on par. In the cross-lingual setting, our model performance is better than the baseline for 3 languages (leading to 1st place for Africaans and 2nd place for Indonesian), is on par for 2 languages and performs poorly on the remaining 7 languages.</abstract>
<identifier type="citekey">roy-dipta-vallurupalli-2024-umbclu</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.195</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.195</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1351</start>
<end>1357</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UMBCLU at SemEval-2024 Task 1: Semantic Textual Relatedness with and without machine translation
%A Roy Dipta, Shubhashis
%A Vallurupalli, Sai
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F roy-dipta-vallurupalli-2024-umbclu
%X The aim of SemEval-2024 Task 1, “Semantic Textual Relatedness for African and Asian Languages” is to develop models for identifying semantic textual relatedness (STR) between two sentences using multiple languages (14 African and Asian languages) and settings (supervised, unsupervised, and cross-lingual). Large language models (LLMs) have shown impressive performance on several natural language understanding tasks such as multilingual machine translation (MMT), semantic similarity (STS), and encoding sentence embeddings. Using a combination of LLMs that perform well on these tasks, we developed two STR models, TranSem and FineSem, for the supervised and cross-lingual settings. We explore the effectiveness of several training methods and the usefulness of machine translation. We find that direct fine-tuning on the task is comparable to using sentence embeddings and translating to English leads to better performance for some languages. In the supervised setting, our model performance is better than the official baseline for 3 languages with the remaining 4 performing on par. In the cross-lingual setting, our model performance is better than the baseline for 3 languages (leading to 1st place for Africaans and 2nd place for Indonesian), is on par for 2 languages and performs poorly on the remaining 7 languages.
%R 10.18653/v1/2024.semeval-1.195
%U https://aclanthology.org/2024.semeval-1.195
%U https://doi.org/10.18653/v1/2024.semeval-1.195
%P 1351-1357
Markdown (Informal)
[UMBCLU at SemEval-2024 Task 1: Semantic Textual Relatedness with and without machine translation](https://aclanthology.org/2024.semeval-1.195) (Roy Dipta & Vallurupalli, SemEval 2024)
ACL