@inproceedings{petukhova-etal-2024-petkaz,
title = "{P}et{K}az at {S}em{E}val-2024 Task 8: Can Linguistics Capture the Specifics of {LLM}-generated Text?",
author = "Petukhova, Kseniia and
Kazakov, Roman and
Kochmar, Ekaterina",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.166/",
doi = "10.18653/v1/2024.semeval-1.166",
pages = "1140--1147",
abstract = "In this paper, we present our submission to the SemEval-2024 Task 8 {\textquotedblleft}Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection{\textquotedblright}, focusing on the detection of machine-generated texts (MGTs) in English. Specifically, our approach relies on combining embeddings from the RoBERTa-base with diversity features and uses a resampled training set. We score 16th from 139 in the ranking for Subtask A, and our results show that our approach is generalizable across unseen models and domains, achieving an accuracy of 0.91."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="petukhova-etal-2024-petkaz">
<titleInfo>
<title>PetKaz at SemEval-2024 Task 8: Can Linguistics Capture the Specifics of LLM-generated Text?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kseniia</namePart>
<namePart type="family">Petukhova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Kazakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our submission to the SemEval-2024 Task 8 “Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection”, focusing on the detection of machine-generated texts (MGTs) in English. Specifically, our approach relies on combining embeddings from the RoBERTa-base with diversity features and uses a resampled training set. We score 16th from 139 in the ranking for Subtask A, and our results show that our approach is generalizable across unseen models and domains, achieving an accuracy of 0.91.</abstract>
<identifier type="citekey">petukhova-etal-2024-petkaz</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.166</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.166/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1140</start>
<end>1147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PetKaz at SemEval-2024 Task 8: Can Linguistics Capture the Specifics of LLM-generated Text?
%A Petukhova, Kseniia
%A Kazakov, Roman
%A Kochmar, Ekaterina
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F petukhova-etal-2024-petkaz
%X In this paper, we present our submission to the SemEval-2024 Task 8 “Multigenerator, Multidomain, and Multilingual Black-Box Machine-Generated Text Detection”, focusing on the detection of machine-generated texts (MGTs) in English. Specifically, our approach relies on combining embeddings from the RoBERTa-base with diversity features and uses a resampled training set. We score 16th from 139 in the ranking for Subtask A, and our results show that our approach is generalizable across unseen models and domains, achieving an accuracy of 0.91.
%R 10.18653/v1/2024.semeval-1.166
%U https://aclanthology.org/2024.semeval-1.166/
%U https://doi.org/10.18653/v1/2024.semeval-1.166
%P 1140-1147
Markdown (Informal)
[PetKaz at SemEval-2024 Task 8: Can Linguistics Capture the Specifics of LLM-generated Text?](https://aclanthology.org/2024.semeval-1.166/) (Petukhova et al., SemEval 2024)
ACL