@inproceedings{shridhar-etal-2023-longtonotes,
title = "Longtonotes: {O}nto{N}otes with Longer Coreference Chains",
author = "Shridhar, Kumar and
Monath, Nicholas and
Thirukovalluru, Raghuveer and
Stolfo, Alessandro and
Zaheer, Manzil and
McCallum, Andrew and
Sachan, Mrinmaya",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.105",
doi = "10.18653/v1/2023.findings-eacl.105",
pages = "1428--1442",
abstract = "Ontonotes has served as the most important benchmark for coreference resolution. However, for ease of annotation, several long documents in Ontonotes were split into smaller parts. In this work, we build a corpus of coreference-annotated documents of significantly longer length than what is currently available. We do so by providing an accurate, manually-curated, merging of annotations from documents that were split into multiple parts in the original Ontonotes annotation process. The resulting corpus, which we call LongtoNotes contains documents in multiple genres of the English language with varying lengths, the longest of which are up to 8x the length of documents in Ontonotes, and 2x those in Litbank.We evaluate state-of-the-art neural coreference systems on this new corpus, analyze the relationships between model architectures/hyperparameters and document length on performance and efficiency of the models, and demonstrate areas of improvement in long-document coreference modelling revealed by our new corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shridhar-etal-2023-longtonotes">
<titleInfo>
<title>Longtonotes: OntoNotes with Longer Coreference Chains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kumar</namePart>
<namePart type="family">Shridhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Monath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raghuveer</namePart>
<namePart type="family">Thirukovalluru</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Stolfo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manzil</namePart>
<namePart type="family">Zaheer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">McCallum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mrinmaya</namePart>
<namePart type="family">Sachan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Ontonotes has served as the most important benchmark for coreference resolution. However, for ease of annotation, several long documents in Ontonotes were split into smaller parts. In this work, we build a corpus of coreference-annotated documents of significantly longer length than what is currently available. We do so by providing an accurate, manually-curated, merging of annotations from documents that were split into multiple parts in the original Ontonotes annotation process. The resulting corpus, which we call LongtoNotes contains documents in multiple genres of the English language with varying lengths, the longest of which are up to 8x the length of documents in Ontonotes, and 2x those in Litbank.We evaluate state-of-the-art neural coreference systems on this new corpus, analyze the relationships between model architectures/hyperparameters and document length on performance and efficiency of the models, and demonstrate areas of improvement in long-document coreference modelling revealed by our new corpus.</abstract>
<identifier type="citekey">shridhar-etal-2023-longtonotes</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.105</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.105</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>1428</start>
<end>1442</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Longtonotes: OntoNotes with Longer Coreference Chains
%A Shridhar, Kumar
%A Monath, Nicholas
%A Thirukovalluru, Raghuveer
%A Stolfo, Alessandro
%A Zaheer, Manzil
%A McCallum, Andrew
%A Sachan, Mrinmaya
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F shridhar-etal-2023-longtonotes
%X Ontonotes has served as the most important benchmark for coreference resolution. However, for ease of annotation, several long documents in Ontonotes were split into smaller parts. In this work, we build a corpus of coreference-annotated documents of significantly longer length than what is currently available. We do so by providing an accurate, manually-curated, merging of annotations from documents that were split into multiple parts in the original Ontonotes annotation process. The resulting corpus, which we call LongtoNotes contains documents in multiple genres of the English language with varying lengths, the longest of which are up to 8x the length of documents in Ontonotes, and 2x those in Litbank.We evaluate state-of-the-art neural coreference systems on this new corpus, analyze the relationships between model architectures/hyperparameters and document length on performance and efficiency of the models, and demonstrate areas of improvement in long-document coreference modelling revealed by our new corpus.
%R 10.18653/v1/2023.findings-eacl.105
%U https://aclanthology.org/2023.findings-eacl.105
%U https://doi.org/10.18653/v1/2023.findings-eacl.105
%P 1428-1442
Markdown (Informal)
[Longtonotes: OntoNotes with Longer Coreference Chains](https://aclanthology.org/2023.findings-eacl.105) (Shridhar et al., Findings 2023)
ACL
- Kumar Shridhar, Nicholas Monath, Raghuveer Thirukovalluru, Alessandro Stolfo, Manzil Zaheer, Andrew McCallum, and Mrinmaya Sachan. 2023. Longtonotes: OntoNotes with Longer Coreference Chains. In Findings of the Association for Computational Linguistics: EACL 2023, pages 1428–1442, Dubrovnik, Croatia. Association for Computational Linguistics.