@inproceedings{richard-etal-2024-fracas,
title = "{FRACAS}: a {FR}ench Annotated Corpus of Attribution relations in new{S}",
author = "Richard, Ange and
Alonzo Canul, Laura Cristina and
Portet, Fran{\c{c}}ois",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.654",
pages = "7417--7428",
abstract = "Quotation extraction is a widely useful task both from a sociological and from a Natural Language Processing perspective. However, very little data is available to study this task in languages other than English. In this paper, we present FRACAS, a manually annotated corpus of 1,676 newswire texts in French for quotation extraction and source attribution. We first describe the composition of our corpus and the choices that were made in selecting the data. We then detail the annotation guidelines, the annotation process and give relevant statistics about our corpus. We give results for the inter-annotator agreement, which is substantially high for such a difficult linguistic phenomenon. We use this new resource to test the ability of a neural state-of-the-art relation extraction system to extract quotes and their source and we compare this model to the latest available system for quotation extraction for the French language, which is rule-based. Experiments using our dataset on the state-of-the-art system show very promising results considering the difficulty of the task at hand.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="richard-etal-2024-fracas">
<titleInfo>
<title>FRACAS: a FRench Annotated Corpus of Attribution relations in newS</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ange</namePart>
<namePart type="family">Richard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="given">Cristina</namePart>
<namePart type="family">Alonzo Canul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">François</namePart>
<namePart type="family">Portet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Quotation extraction is a widely useful task both from a sociological and from a Natural Language Processing perspective. However, very little data is available to study this task in languages other than English. In this paper, we present FRACAS, a manually annotated corpus of 1,676 newswire texts in French for quotation extraction and source attribution. We first describe the composition of our corpus and the choices that were made in selecting the data. We then detail the annotation guidelines, the annotation process and give relevant statistics about our corpus. We give results for the inter-annotator agreement, which is substantially high for such a difficult linguistic phenomenon. We use this new resource to test the ability of a neural state-of-the-art relation extraction system to extract quotes and their source and we compare this model to the latest available system for quotation extraction for the French language, which is rule-based. Experiments using our dataset on the state-of-the-art system show very promising results considering the difficulty of the task at hand.</abstract>
<identifier type="citekey">richard-etal-2024-fracas</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.654</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7417</start>
<end>7428</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FRACAS: a FRench Annotated Corpus of Attribution relations in newS
%A Richard, Ange
%A Alonzo Canul, Laura Cristina
%A Portet, François
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F richard-etal-2024-fracas
%X Quotation extraction is a widely useful task both from a sociological and from a Natural Language Processing perspective. However, very little data is available to study this task in languages other than English. In this paper, we present FRACAS, a manually annotated corpus of 1,676 newswire texts in French for quotation extraction and source attribution. We first describe the composition of our corpus and the choices that were made in selecting the data. We then detail the annotation guidelines, the annotation process and give relevant statistics about our corpus. We give results for the inter-annotator agreement, which is substantially high for such a difficult linguistic phenomenon. We use this new resource to test the ability of a neural state-of-the-art relation extraction system to extract quotes and their source and we compare this model to the latest available system for quotation extraction for the French language, which is rule-based. Experiments using our dataset on the state-of-the-art system show very promising results considering the difficulty of the task at hand.
%U https://aclanthology.org/2024.lrec-main.654
%P 7417-7428
Markdown (Informal)
[FRACAS: a FRench Annotated Corpus of Attribution relations in newS](https://aclanthology.org/2024.lrec-main.654) (Richard et al., LREC-COLING 2024)
ACL