@inproceedings{ronningstad-etal-2024-gpt,
title = "A {GPT} among Annotators: {LLM}-based Entity-Level Sentiment Annotation",
author = "R{\o}nningstad, Egil and
Velldal, Erik and
{\O}vrelid, Lilja",
editor = "Henning, Sophie and
Stede, Manfred",
booktitle = "Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.law-1.13/",
pages = "133--139",
abstract = "We investigate annotator variation for the novel task of Entity-Level Sentiment Analysis (ELSA) which annotates the aggregated sentiment directed towards volitional entities in a text. More specifically, we analyze the annotations of a newly constructed Norwegian ELSA dataset and release additional data with each annotator`s labels for the 247 entities in the dataset`s test split. We also perform a number of experiments prompting ChatGPT for these sentiment labels regarding each entity in the text and compare the generated annotations with the human labels. Cohen`s Kappa for agreement between the best LLM-generated labels and curated gold was 0.425, which indicates that these labels would not have high quality. Our analyses further investigate the errors that ChatGPT outputs, and compare them with the variations that we find among the 5 trained annotators that all annotated the same test data."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ronningstad-etal-2024-gpt">
<titleInfo>
<title>A GPT among Annotators: LLM-based Entity-Level Sentiment Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Egil</namePart>
<namePart type="family">Rønningstad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Velldal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sophie</namePart>
<namePart type="family">Henning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manfred</namePart>
<namePart type="family">Stede</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate annotator variation for the novel task of Entity-Level Sentiment Analysis (ELSA) which annotates the aggregated sentiment directed towards volitional entities in a text. More specifically, we analyze the annotations of a newly constructed Norwegian ELSA dataset and release additional data with each annotator‘s labels for the 247 entities in the dataset‘s test split. We also perform a number of experiments prompting ChatGPT for these sentiment labels regarding each entity in the text and compare the generated annotations with the human labels. Cohen‘s Kappa for agreement between the best LLM-generated labels and curated gold was 0.425, which indicates that these labels would not have high quality. Our analyses further investigate the errors that ChatGPT outputs, and compare them with the variations that we find among the 5 trained annotators that all annotated the same test data.</abstract>
<identifier type="citekey">ronningstad-etal-2024-gpt</identifier>
<location>
<url>https://aclanthology.org/2024.law-1.13/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>133</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A GPT among Annotators: LLM-based Entity-Level Sentiment Annotation
%A Rønningstad, Egil
%A Velldal, Erik
%A Øvrelid, Lilja
%Y Henning, Sophie
%Y Stede, Manfred
%S Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F ronningstad-etal-2024-gpt
%X We investigate annotator variation for the novel task of Entity-Level Sentiment Analysis (ELSA) which annotates the aggregated sentiment directed towards volitional entities in a text. More specifically, we analyze the annotations of a newly constructed Norwegian ELSA dataset and release additional data with each annotator‘s labels for the 247 entities in the dataset‘s test split. We also perform a number of experiments prompting ChatGPT for these sentiment labels regarding each entity in the text and compare the generated annotations with the human labels. Cohen‘s Kappa for agreement between the best LLM-generated labels and curated gold was 0.425, which indicates that these labels would not have high quality. Our analyses further investigate the errors that ChatGPT outputs, and compare them with the variations that we find among the 5 trained annotators that all annotated the same test data.
%U https://aclanthology.org/2024.law-1.13/
%P 133-139
Markdown (Informal)
[A GPT among Annotators: LLM-based Entity-Level Sentiment Annotation](https://aclanthology.org/2024.law-1.13/) (Rønningstad et al., LAW 2024)
ACL