@inproceedings{neumann-etal-2021-pawls,
title = "{PAWLS}: {PDF} Annotation With Labels and Structure",
author = "Neumann, Mark and
Shen, Zejiang and
Skjonsberg, Sam",
editor = "Ji, Heng and
Park, Jong C. and
Xia, Rui",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-demo.31",
doi = "10.18653/v1/2021.acl-demo.31",
pages = "258--264",
abstract = "Adobe{'}s Portable Document Format (PDF) is a popular way of distributing view-only documents with a rich visual markup. This presents a challenge to NLP practitioners who wish to use the information contained within PDF documents for training models or data analysis, because annotating these documents is difficult. In this paper, we present PDF Annotation with Labels and Structure (PAWLS), a new annotation tool designed specifically for the PDF document format. PAWLS is particularly suited for mixed-mode annotation and scenarios in which annotators require extended context to annotate accurately. PAWLS supports span-based textual annotation, N-ary relations and freeform, non-textual bounding boxes, all of which can be exported in convenient formats for training multi-modal machine learning models. A read-only PAWLS server is available at \url{https://pawls.apps.allenai.org/}, and the source code is available at \url{https://github.com/allenai/pawls}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="neumann-etal-2021-pawls">
<titleInfo>
<title>PAWLS: PDF Annotation With Labels and Structure</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Neumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zejiang</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sam</namePart>
<namePart type="family">Skjonsberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jong</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Adobe’s Portable Document Format (PDF) is a popular way of distributing view-only documents with a rich visual markup. This presents a challenge to NLP practitioners who wish to use the information contained within PDF documents for training models or data analysis, because annotating these documents is difficult. In this paper, we present PDF Annotation with Labels and Structure (PAWLS), a new annotation tool designed specifically for the PDF document format. PAWLS is particularly suited for mixed-mode annotation and scenarios in which annotators require extended context to annotate accurately. PAWLS supports span-based textual annotation, N-ary relations and freeform, non-textual bounding boxes, all of which can be exported in convenient formats for training multi-modal machine learning models. A read-only PAWLS server is available at https://pawls.apps.allenai.org/, and the source code is available at https://github.com/allenai/pawls.</abstract>
<identifier type="citekey">neumann-etal-2021-pawls</identifier>
<identifier type="doi">10.18653/v1/2021.acl-demo.31</identifier>
<location>
<url>https://aclanthology.org/2021.acl-demo.31</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>258</start>
<end>264</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PAWLS: PDF Annotation With Labels and Structure
%A Neumann, Mark
%A Shen, Zejiang
%A Skjonsberg, Sam
%Y Ji, Heng
%Y Park, Jong C.
%Y Xia, Rui
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F neumann-etal-2021-pawls
%X Adobe’s Portable Document Format (PDF) is a popular way of distributing view-only documents with a rich visual markup. This presents a challenge to NLP practitioners who wish to use the information contained within PDF documents for training models or data analysis, because annotating these documents is difficult. In this paper, we present PDF Annotation with Labels and Structure (PAWLS), a new annotation tool designed specifically for the PDF document format. PAWLS is particularly suited for mixed-mode annotation and scenarios in which annotators require extended context to annotate accurately. PAWLS supports span-based textual annotation, N-ary relations and freeform, non-textual bounding boxes, all of which can be exported in convenient formats for training multi-modal machine learning models. A read-only PAWLS server is available at https://pawls.apps.allenai.org/, and the source code is available at https://github.com/allenai/pawls.
%R 10.18653/v1/2021.acl-demo.31
%U https://aclanthology.org/2021.acl-demo.31
%U https://doi.org/10.18653/v1/2021.acl-demo.31
%P 258-264
Markdown (Informal)
[PAWLS: PDF Annotation With Labels and Structure](https://aclanthology.org/2021.acl-demo.31) (Neumann et al., ACL-IJCNLP 2021)
ACL
- Mark Neumann, Zejiang Shen, and Sam Skjonsberg. 2021. PAWLS: PDF Annotation With Labels and Structure. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations, pages 258–264, Online. Association for Computational Linguistics.