@inproceedings{baric-etal-2024-actor,
title = "Actor Identification in Discourse: A Challenge for {LLM}s?",
author = "Bari{\'c}, Ana and
Pad{\'o}, Sebastian and
Papay, Sean",
editor = "Strube, Michael and
Braud, Chloe and
Hardmeier, Christian and
Li, Junyi Jessy and
Loaiciga, Sharid and
Zeldes, Amir and
Li, Chuyuan",
booktitle = "Proceedings of the 5th Workshop on Computational Approaches to Discourse (CODI 2024)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.codi-1.6",
pages = "64--70",
abstract = "The identification of political actors who put forward claims in public debate is a crucial step in the construction of discourse networks, which are helpful to analyze societal debates. Actor identification is, however, rather challenging: Often, the locally mentioned speaker of a claim is only a pronoun ({``}He proposed that [claim]{''}), so recovering the canonical actor name requires discourse understanding. We compare a traditional pipeline of dedicated NLP components (similar to those applied to the related task of coreference) with a LLM, which appears a good match for this generation task. Evaluating on a corpus of German actors in newspaper reports, we find surprisingly that the LLM performs worse. Further analysis reveals that the LLM is very good at identifying the right reference, but struggles to generate the correct canonical form. This points to an underlying issue in LLMs with controlling generated output. Indeed, a hybrid model combining the LLM with a classifier to normalize its output substantially outperforms both initial models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="baric-etal-2024-actor">
<titleInfo>
<title>Actor Identification in Discourse: A Challenge for LLMs?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Barić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Padó</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">Papay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Computational Approaches to Discourse (CODI 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Strube</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chloe</namePart>
<namePart type="family">Braud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharid</namePart>
<namePart type="family">Loaiciga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuyuan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The identification of political actors who put forward claims in public debate is a crucial step in the construction of discourse networks, which are helpful to analyze societal debates. Actor identification is, however, rather challenging: Often, the locally mentioned speaker of a claim is only a pronoun (“He proposed that [claim]”), so recovering the canonical actor name requires discourse understanding. We compare a traditional pipeline of dedicated NLP components (similar to those applied to the related task of coreference) with a LLM, which appears a good match for this generation task. Evaluating on a corpus of German actors in newspaper reports, we find surprisingly that the LLM performs worse. Further analysis reveals that the LLM is very good at identifying the right reference, but struggles to generate the correct canonical form. This points to an underlying issue in LLMs with controlling generated output. Indeed, a hybrid model combining the LLM with a classifier to normalize its output substantially outperforms both initial models.</abstract>
<identifier type="citekey">baric-etal-2024-actor</identifier>
<location>
<url>https://aclanthology.org/2024.codi-1.6</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>64</start>
<end>70</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Actor Identification in Discourse: A Challenge for LLMs?
%A Barić, Ana
%A Padó, Sebastian
%A Papay, Sean
%Y Strube, Michael
%Y Braud, Chloe
%Y Hardmeier, Christian
%Y Li, Junyi Jessy
%Y Loaiciga, Sharid
%Y Zeldes, Amir
%Y Li, Chuyuan
%S Proceedings of the 5th Workshop on Computational Approaches to Discourse (CODI 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F baric-etal-2024-actor
%X The identification of political actors who put forward claims in public debate is a crucial step in the construction of discourse networks, which are helpful to analyze societal debates. Actor identification is, however, rather challenging: Often, the locally mentioned speaker of a claim is only a pronoun (“He proposed that [claim]”), so recovering the canonical actor name requires discourse understanding. We compare a traditional pipeline of dedicated NLP components (similar to those applied to the related task of coreference) with a LLM, which appears a good match for this generation task. Evaluating on a corpus of German actors in newspaper reports, we find surprisingly that the LLM performs worse. Further analysis reveals that the LLM is very good at identifying the right reference, but struggles to generate the correct canonical form. This points to an underlying issue in LLMs with controlling generated output. Indeed, a hybrid model combining the LLM with a classifier to normalize its output substantially outperforms both initial models.
%U https://aclanthology.org/2024.codi-1.6
%P 64-70
Markdown (Informal)
[Actor Identification in Discourse: A Challenge for LLMs?](https://aclanthology.org/2024.codi-1.6) (Barić et al., CODI-WS 2024)
ACL