@inproceedings{jobanputra-martin-rodriguez-2022-oversampledml,
title = "{O}versampled{ML} at {S}em{E}val-2022 Task 8: When multilingual news similarity met Zero-shot approaches",
author = "Jobanputra, Mayank and
Mart{\'i}n Rodr{\'i}guez, Lorena",
editor = "Emerson, Guy and
Schluter, Natalie and
Stanovsky, Gabriel and
Kumar, Ritesh and
Palmer, Alexis and
Schneider, Nathan and
Singh, Siddharth and
Ratan, Shyam",
booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.semeval-1.165/",
doi = "10.18653/v1/2022.semeval-1.165",
pages = "1171--1177",
abstract = "We investigate the capabilities of pre-trained models, without any fine-tuning, for a document-level multilingual news similarity task of SemEval-2022. We utilize title and news content with appropriate pre-processing techniques. Our system derives 14 different similarity features using a combination of state-of-the-art methods (MPNet) with well-known statistical methods (i.e. TF-IDF, Word Mover`s distance). We formulate multilingual news similarity task as a regression task and approximate the overall similarity between two news articles using these features. Our best-performing system achieved a correlation score of 70.1{\%} and was ranked 20th among the 34 participating teams. In this paper, in addition to a system description, we also provide further analysis of our results and an ablation study highlighting the strengths and limitations of our features. We make our code publicly available at \url{https://github.com/cicl-iscl/multinewssimilarity}"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jobanputra-martin-rodriguez-2022-oversampledml">
<titleInfo>
<title>OversampledML at SemEval-2022 Task 8: When multilingual news similarity met Zero-shot approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mayank</namePart>
<namePart type="family">Jobanputra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lorena</namePart>
<namePart type="family">Martín Rodríguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Emerson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Ratan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the capabilities of pre-trained models, without any fine-tuning, for a document-level multilingual news similarity task of SemEval-2022. We utilize title and news content with appropriate pre-processing techniques. Our system derives 14 different similarity features using a combination of state-of-the-art methods (MPNet) with well-known statistical methods (i.e. TF-IDF, Word Mover‘s distance). We formulate multilingual news similarity task as a regression task and approximate the overall similarity between two news articles using these features. Our best-performing system achieved a correlation score of 70.1% and was ranked 20th among the 34 participating teams. In this paper, in addition to a system description, we also provide further analysis of our results and an ablation study highlighting the strengths and limitations of our features. We make our code publicly available at https://github.com/cicl-iscl/multinewssimilarity</abstract>
<identifier type="citekey">jobanputra-martin-rodriguez-2022-oversampledml</identifier>
<identifier type="doi">10.18653/v1/2022.semeval-1.165</identifier>
<location>
<url>https://aclanthology.org/2022.semeval-1.165/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>1171</start>
<end>1177</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OversampledML at SemEval-2022 Task 8: When multilingual news similarity met Zero-shot approaches
%A Jobanputra, Mayank
%A Martín Rodríguez, Lorena
%Y Emerson, Guy
%Y Schluter, Natalie
%Y Stanovsky, Gabriel
%Y Kumar, Ritesh
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y Singh, Siddharth
%Y Ratan, Shyam
%S Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F jobanputra-martin-rodriguez-2022-oversampledml
%X We investigate the capabilities of pre-trained models, without any fine-tuning, for a document-level multilingual news similarity task of SemEval-2022. We utilize title and news content with appropriate pre-processing techniques. Our system derives 14 different similarity features using a combination of state-of-the-art methods (MPNet) with well-known statistical methods (i.e. TF-IDF, Word Mover‘s distance). We formulate multilingual news similarity task as a regression task and approximate the overall similarity between two news articles using these features. Our best-performing system achieved a correlation score of 70.1% and was ranked 20th among the 34 participating teams. In this paper, in addition to a system description, we also provide further analysis of our results and an ablation study highlighting the strengths and limitations of our features. We make our code publicly available at https://github.com/cicl-iscl/multinewssimilarity
%R 10.18653/v1/2022.semeval-1.165
%U https://aclanthology.org/2022.semeval-1.165/
%U https://doi.org/10.18653/v1/2022.semeval-1.165
%P 1171-1177
Markdown (Informal)
[OversampledML at SemEval-2022 Task 8: When multilingual news similarity met Zero-shot approaches](https://aclanthology.org/2022.semeval-1.165/) (Jobanputra & Martín Rodríguez, SemEval 2022)
ACL