@inproceedings{huguet-cabot-etal-2023-red,
title = "{RED}$^{\textrm{FM}}$: a Filtered and Multilingual Relation Extraction Dataset",
author = "Huguet Cabot, Pere-Llu{\'\i}s and
Tedeschi, Simone and
Ngonga Ngomo, Axel-Cyrille and
Navigli, Roberto",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-long.237",
doi = "10.18653/v1/2023.acl-long.237",
pages = "4326--4343",
abstract = "Relation Extraction (RE) is a task that identifies relationships between entities in a text, enabling the acquisition of relational facts and bridging the gap between natural language and structured knowledge. However, current RE models often rely on small datasets with low coverage of relation types, particularly when working with languages other than English.In this paper, we address the above issue and provide two new resources that enable the training and evaluation of multilingual RE systems. First, we present SRED$^{\textrm{FM}}$, an automatically annotated dataset covering 18 languages, 400 relation types, 13 entity types, totaling more than 40 million triplet instances. Second, we propose RED$^{\textrm{FM}}$, a smaller, human-revised dataset for seven languages that allows for the evaluation of multilingual RE systems. To demonstrate the utility of these novel datasets, we experiment with the first end-to-end multilingual RE model, mREBEL, that extracts triplets, including entity types, in multiple languages. We release our resources and model checkpoints at [\url{https://www.github.com/babelscape/rebel}](\url{https://www.github.com/babelscape/rebel}).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huguet-cabot-etal-2023-red">
<titleInfo>
<title>RED^FM: a Filtered and Multilingual Relation Extraction Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pere-Lluís</namePart>
<namePart type="family">Huguet Cabot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Tedeschi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Axel-Cyrille</namePart>
<namePart type="family">Ngonga Ngomo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Relation Extraction (RE) is a task that identifies relationships between entities in a text, enabling the acquisition of relational facts and bridging the gap between natural language and structured knowledge. However, current RE models often rely on small datasets with low coverage of relation types, particularly when working with languages other than English.In this paper, we address the above issue and provide two new resources that enable the training and evaluation of multilingual RE systems. First, we present SRED^FM, an automatically annotated dataset covering 18 languages, 400 relation types, 13 entity types, totaling more than 40 million triplet instances. Second, we propose RED^FM, a smaller, human-revised dataset for seven languages that allows for the evaluation of multilingual RE systems. To demonstrate the utility of these novel datasets, we experiment with the first end-to-end multilingual RE model, mREBEL, that extracts triplets, including entity types, in multiple languages. We release our resources and model checkpoints at [https://www.github.com/babelscape/rebel](https://www.github.com/babelscape/rebel).</abstract>
<identifier type="citekey">huguet-cabot-etal-2023-red</identifier>
<identifier type="doi">10.18653/v1/2023.acl-long.237</identifier>
<location>
<url>https://aclanthology.org/2023.acl-long.237</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>4326</start>
<end>4343</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RED^FM: a Filtered and Multilingual Relation Extraction Dataset
%A Huguet Cabot, Pere-Lluís
%A Tedeschi, Simone
%A Ngonga Ngomo, Axel-Cyrille
%A Navigli, Roberto
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F huguet-cabot-etal-2023-red
%X Relation Extraction (RE) is a task that identifies relationships between entities in a text, enabling the acquisition of relational facts and bridging the gap between natural language and structured knowledge. However, current RE models often rely on small datasets with low coverage of relation types, particularly when working with languages other than English.In this paper, we address the above issue and provide two new resources that enable the training and evaluation of multilingual RE systems. First, we present SRED^FM, an automatically annotated dataset covering 18 languages, 400 relation types, 13 entity types, totaling more than 40 million triplet instances. Second, we propose RED^FM, a smaller, human-revised dataset for seven languages that allows for the evaluation of multilingual RE systems. To demonstrate the utility of these novel datasets, we experiment with the first end-to-end multilingual RE model, mREBEL, that extracts triplets, including entity types, in multiple languages. We release our resources and model checkpoints at [https://www.github.com/babelscape/rebel](https://www.github.com/babelscape/rebel).
%R 10.18653/v1/2023.acl-long.237
%U https://aclanthology.org/2023.acl-long.237
%U https://doi.org/10.18653/v1/2023.acl-long.237
%P 4326-4343
Markdown (Informal)
[REDFM: a Filtered and Multilingual Relation Extraction Dataset](https://aclanthology.org/2023.acl-long.237) (Huguet Cabot et al., ACL 2023)
ACL
- Pere-Lluís Huguet Cabot, Simone Tedeschi, Axel-Cyrille Ngonga Ngomo, and Roberto Navigli. 2023. REDFM: a Filtered and Multilingual Relation Extraction Dataset. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 4326–4343, Toronto, Canada. Association for Computational Linguistics.