@inproceedings{blair-bar-2024-jrc-names,
title = "{JRC}-Names-Retrieval: A Standardized Benchmark for Name Search",
author = "Blair, Philip and
Bar, Kfir",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.838",
pages = "9589--9603",
abstract = "Many systems rely on the ability to effectively search through databases of personal and organization entity names in multiple writing scripts. Despite this, there is a relative lack of research studying this problem in isolation. In this work, we discuss this problem in detail and support future research by publishing what we believe is the first comprehensive dataset designed for this task. Additionally, we present a number of baselines against which future work can be compared; among which, we describe a neural solution based on ByT5 (Xue et al. 2022) which demonstrates up to a 12{\%} performance gain over preexisting baselines, indicating that there remains much room for improvement in this space.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blair-bar-2024-jrc-names">
<titleInfo>
<title>JRC-Names-Retrieval: A Standardized Benchmark for Name Search</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philip</namePart>
<namePart type="family">Blair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kfir</namePart>
<namePart type="family">Bar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many systems rely on the ability to effectively search through databases of personal and organization entity names in multiple writing scripts. Despite this, there is a relative lack of research studying this problem in isolation. In this work, we discuss this problem in detail and support future research by publishing what we believe is the first comprehensive dataset designed for this task. Additionally, we present a number of baselines against which future work can be compared; among which, we describe a neural solution based on ByT5 (Xue et al. 2022) which demonstrates up to a 12% performance gain over preexisting baselines, indicating that there remains much room for improvement in this space.</abstract>
<identifier type="citekey">blair-bar-2024-jrc-names</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.838</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>9589</start>
<end>9603</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JRC-Names-Retrieval: A Standardized Benchmark for Name Search
%A Blair, Philip
%A Bar, Kfir
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F blair-bar-2024-jrc-names
%X Many systems rely on the ability to effectively search through databases of personal and organization entity names in multiple writing scripts. Despite this, there is a relative lack of research studying this problem in isolation. In this work, we discuss this problem in detail and support future research by publishing what we believe is the first comprehensive dataset designed for this task. Additionally, we present a number of baselines against which future work can be compared; among which, we describe a neural solution based on ByT5 (Xue et al. 2022) which demonstrates up to a 12% performance gain over preexisting baselines, indicating that there remains much room for improvement in this space.
%U https://aclanthology.org/2024.lrec-main.838
%P 9589-9603
Markdown (Informal)
[JRC-Names-Retrieval: A Standardized Benchmark for Name Search](https://aclanthology.org/2024.lrec-main.838) (Blair & Bar, LREC-COLING 2024)
ACL