@inproceedings{chen-etal-2022-crossroads,
title = "Crossroads, Buildings and Neighborhoods: A Dataset for Fine-grained Location Recognition",
author = "Chen, Pei and
Xu, Haotian and
Zhang, Cheng and
Huang, Ruihong",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.243/",
doi = "10.18653/v1/2022.naacl-main.243",
pages = "3329--3339",
abstract = "General domain Named Entity Recognition (NER) datasets like CoNLL-2003 mostly annotate coarse-grained location entities such as a country or a city. But many applications require identifying fine-grained locations from texts and mapping them precisely to geographic sites, e.g., a crossroad, an apartment building, or a grocery store. In this paper, we introduce a new dataset HarveyNER with fine-grained locations annotated in tweets. This dataset presents unique challenges and characterizes many complex and long location mentions in informal descriptions. We built strong baseline models using Curriculum Learning and experimented with different heuristic curricula to better recognize difficult location mentions. Experimental results show that the simple curricula can improve the system`s performance on hard cases and its overall performance, and outperform several other baseline systems. The dataset and the baseline models can be found at \url{https://github.com/brickee/HarveyNER}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2022-crossroads">
<titleInfo>
<title>Crossroads, Buildings and Neighborhoods: A Dataset for Fine-grained Location Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pei</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haotian</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>General domain Named Entity Recognition (NER) datasets like CoNLL-2003 mostly annotate coarse-grained location entities such as a country or a city. But many applications require identifying fine-grained locations from texts and mapping them precisely to geographic sites, e.g., a crossroad, an apartment building, or a grocery store. In this paper, we introduce a new dataset HarveyNER with fine-grained locations annotated in tweets. This dataset presents unique challenges and characterizes many complex and long location mentions in informal descriptions. We built strong baseline models using Curriculum Learning and experimented with different heuristic curricula to better recognize difficult location mentions. Experimental results show that the simple curricula can improve the system‘s performance on hard cases and its overall performance, and outperform several other baseline systems. The dataset and the baseline models can be found at https://github.com/brickee/HarveyNER.</abstract>
<identifier type="citekey">chen-etal-2022-crossroads</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.243</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.243/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>3329</start>
<end>3339</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Crossroads, Buildings and Neighborhoods: A Dataset for Fine-grained Location Recognition
%A Chen, Pei
%A Xu, Haotian
%A Zhang, Cheng
%A Huang, Ruihong
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F chen-etal-2022-crossroads
%X General domain Named Entity Recognition (NER) datasets like CoNLL-2003 mostly annotate coarse-grained location entities such as a country or a city. But many applications require identifying fine-grained locations from texts and mapping them precisely to geographic sites, e.g., a crossroad, an apartment building, or a grocery store. In this paper, we introduce a new dataset HarveyNER with fine-grained locations annotated in tweets. This dataset presents unique challenges and characterizes many complex and long location mentions in informal descriptions. We built strong baseline models using Curriculum Learning and experimented with different heuristic curricula to better recognize difficult location mentions. Experimental results show that the simple curricula can improve the system‘s performance on hard cases and its overall performance, and outperform several other baseline systems. The dataset and the baseline models can be found at https://github.com/brickee/HarveyNER.
%R 10.18653/v1/2022.naacl-main.243
%U https://aclanthology.org/2022.naacl-main.243/
%U https://doi.org/10.18653/v1/2022.naacl-main.243
%P 3329-3339
Markdown (Informal)
[Crossroads, Buildings and Neighborhoods: A Dataset for Fine-grained Location Recognition](https://aclanthology.org/2022.naacl-main.243/) (Chen et al., NAACL 2022)
ACL