@inproceedings{li-etal-2024-ted,
title = "{TED}-{EL}: A Corpus for Speech Entity Linking",
author = "Li, Silin and
Song, Ruoyu and
Lan, Tianwei and
Liu, Zeming and
Guo, Yuhang",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1365",
pages = "15721--15731",
abstract = "Speech entity linking amis to recognize mentions from speech and link them to entities in knowledge bases. Previous work on entity linking mainly focuses on visual context and text context. In contrast, speech entity linking focuses on audio context. In this paper, we first propose the speech entity linking task. To facilitate the study of this task, we propose the first speech entity linking dataset, TED-EL. Our corpus is a high-quality, human-annotated, audio, text, and mention-entity pair parallel dataset derived from Technology, Entertainment, Design (TED) talks and includes a wide range of entity types (24 types). Based on TED-EL, we designed two types of models: ranking-based and generative speech entity linking models. We conducted experiments on the TED-EL dataset for both types of models. The results show that the ranking-based models outperform the generative models, achieving an F1 score of 60.68{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2024-ted">
<titleInfo>
<title>TED-EL: A Corpus for Speech Entity Linking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Silin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruoyu</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianwei</namePart>
<namePart type="family">Lan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeming</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhang</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Speech entity linking amis to recognize mentions from speech and link them to entities in knowledge bases. Previous work on entity linking mainly focuses on visual context and text context. In contrast, speech entity linking focuses on audio context. In this paper, we first propose the speech entity linking task. To facilitate the study of this task, we propose the first speech entity linking dataset, TED-EL. Our corpus is a high-quality, human-annotated, audio, text, and mention-entity pair parallel dataset derived from Technology, Entertainment, Design (TED) talks and includes a wide range of entity types (24 types). Based on TED-EL, we designed two types of models: ranking-based and generative speech entity linking models. We conducted experiments on the TED-EL dataset for both types of models. The results show that the ranking-based models outperform the generative models, achieving an F1 score of 60.68%.</abstract>
<identifier type="citekey">li-etal-2024-ted</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1365</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>15721</start>
<end>15731</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TED-EL: A Corpus for Speech Entity Linking
%A Li, Silin
%A Song, Ruoyu
%A Lan, Tianwei
%A Liu, Zeming
%A Guo, Yuhang
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F li-etal-2024-ted
%X Speech entity linking amis to recognize mentions from speech and link them to entities in knowledge bases. Previous work on entity linking mainly focuses on visual context and text context. In contrast, speech entity linking focuses on audio context. In this paper, we first propose the speech entity linking task. To facilitate the study of this task, we propose the first speech entity linking dataset, TED-EL. Our corpus is a high-quality, human-annotated, audio, text, and mention-entity pair parallel dataset derived from Technology, Entertainment, Design (TED) talks and includes a wide range of entity types (24 types). Based on TED-EL, we designed two types of models: ranking-based and generative speech entity linking models. We conducted experiments on the TED-EL dataset for both types of models. The results show that the ranking-based models outperform the generative models, achieving an F1 score of 60.68%.
%U https://aclanthology.org/2024.lrec-main.1365
%P 15721-15731
Markdown (Informal)
[TED-EL: A Corpus for Speech Entity Linking](https://aclanthology.org/2024.lrec-main.1365) (Li et al., LREC-COLING 2024)
ACL
- Silin Li, Ruoyu Song, Tianwei Lan, Zeming Liu, and Yuhang Guo. 2024. TED-EL: A Corpus for Speech Entity Linking. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 15721–15731, Torino, Italia. ELRA and ICCL.