@inproceedings{iordanidou-etal-2024-duth,
title = "{DUT}h at {S}em{E}val-2024 Task 6: Comparing Pre-trained Models on Sentence Similarity Evaluation for Detecting of Hallucinations and Related Observable Overgeneration Mistakes",
author = "Iordanidou, Ioanna and
Maslaris, Ioannis and
Arampatzis, Avi",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.154/",
doi = "10.18653/v1/2024.semeval-1.154",
pages = "1064--1070",
abstract = "In this paper, we present our approach toSemEval-2024 Task 6: SHROOM, a Sharedtask on Hallucinations and Related ObservableOvergeneration Mistakes, which aims to determine weather AI generated text is semanticallycorrect or incorrect. This work is a comparative study of Large Language Models (LLMs)in the context of the task, shedding light ontheir effectiveness and nuances. We present asystem that leverages pre-trained LLMs, suchas LaBSE, T5, and DistilUSE, for binary classification of given sentences into {\textquoteleft}Hallucination`or {\textquoteleft}Not Hallucination' classes by evaluatingthe model`s output against the reference correct text. Moreover, beyond utilizing labeleddatasets, our methodology integrates syntheticlabel creation in unlabeled datasets, followedby the prediction of test labels."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="iordanidou-etal-2024-duth">
<titleInfo>
<title>DUTh at SemEval-2024 Task 6: Comparing Pre-trained Models on Sentence Similarity Evaluation for Detecting of Hallucinations and Related Observable Overgeneration Mistakes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ioanna</namePart>
<namePart type="family">Iordanidou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Maslaris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Arampatzis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our approach toSemEval-2024 Task 6: SHROOM, a Sharedtask on Hallucinations and Related ObservableOvergeneration Mistakes, which aims to determine weather AI generated text is semanticallycorrect or incorrect. This work is a comparative study of Large Language Models (LLMs)in the context of the task, shedding light ontheir effectiveness and nuances. We present asystem that leverages pre-trained LLMs, suchas LaBSE, T5, and DistilUSE, for binary classification of given sentences into ‘Hallucination‘or ‘Not Hallucination’ classes by evaluatingthe model‘s output against the reference correct text. Moreover, beyond utilizing labeleddatasets, our methodology integrates syntheticlabel creation in unlabeled datasets, followedby the prediction of test labels.</abstract>
<identifier type="citekey">iordanidou-etal-2024-duth</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.154</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.154/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1064</start>
<end>1070</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DUTh at SemEval-2024 Task 6: Comparing Pre-trained Models on Sentence Similarity Evaluation for Detecting of Hallucinations and Related Observable Overgeneration Mistakes
%A Iordanidou, Ioanna
%A Maslaris, Ioannis
%A Arampatzis, Avi
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F iordanidou-etal-2024-duth
%X In this paper, we present our approach toSemEval-2024 Task 6: SHROOM, a Sharedtask on Hallucinations and Related ObservableOvergeneration Mistakes, which aims to determine weather AI generated text is semanticallycorrect or incorrect. This work is a comparative study of Large Language Models (LLMs)in the context of the task, shedding light ontheir effectiveness and nuances. We present asystem that leverages pre-trained LLMs, suchas LaBSE, T5, and DistilUSE, for binary classification of given sentences into ‘Hallucination‘or ‘Not Hallucination’ classes by evaluatingthe model‘s output against the reference correct text. Moreover, beyond utilizing labeleddatasets, our methodology integrates syntheticlabel creation in unlabeled datasets, followedby the prediction of test labels.
%R 10.18653/v1/2024.semeval-1.154
%U https://aclanthology.org/2024.semeval-1.154/
%U https://doi.org/10.18653/v1/2024.semeval-1.154
%P 1064-1070
Markdown (Informal)
[DUTh at SemEval-2024 Task 6: Comparing Pre-trained Models on Sentence Similarity Evaluation for Detecting of Hallucinations and Related Observable Overgeneration Mistakes](https://aclanthology.org/2024.semeval-1.154/) (Iordanidou et al., SemEval 2024)
ACL