@inproceedings{nabizadeh-etal-2020-myfixit,
title = "{M}y{F}ixit: An Annotated Dataset, Annotation Tool, and Baseline Methods for Information Extraction from Repair Manuals",
author = "Nabizadeh, Nima and
Kolossa, Dorothea and
Heckmann, Martin",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.260",
pages = "2120--2128",
abstract = "Text instructions are among the most widely used media for learning and teaching. Hence, to create assistance systems that are capable of supporting humans autonomously in new tasks, it would be immensely productive, if machines were enabled to extract task knowledge from such text instructions. In this paper, we, therefore, focus on information extraction (IE) from the instructional text in repair manuals. This brings with it the multiple challenges of information extraction from the situated and technical language in relatively long and often complex instructions. To tackle these challenges, we introduce a semi-structured dataset of repair manuals. The dataset is annotated in a large category of devices, with information that we consider most valuable for an automated repair assistant, including the required tools and the disassembled parts at each step of the repair progress. We then propose methods that can serve as baselines for this IE task: an unsupervised method based on a bags-of-n-grams similarity for extracting the needed tools in each repair step, and a deep-learning-based sequence labeling model for extracting the identity of disassembled parts. These baseline methods are integrated into a semi-automatic web-based annotator application that is also available along with the dataset.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nabizadeh-etal-2020-myfixit">
<titleInfo>
<title>MyFixit: An Annotated Dataset, Annotation Tool, and Baseline Methods for Information Extraction from Repair Manuals</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nima</namePart>
<namePart type="family">Nabizadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dorothea</namePart>
<namePart type="family">Kolossa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Heckmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Text instructions are among the most widely used media for learning and teaching. Hence, to create assistance systems that are capable of supporting humans autonomously in new tasks, it would be immensely productive, if machines were enabled to extract task knowledge from such text instructions. In this paper, we, therefore, focus on information extraction (IE) from the instructional text in repair manuals. This brings with it the multiple challenges of information extraction from the situated and technical language in relatively long and often complex instructions. To tackle these challenges, we introduce a semi-structured dataset of repair manuals. The dataset is annotated in a large category of devices, with information that we consider most valuable for an automated repair assistant, including the required tools and the disassembled parts at each step of the repair progress. We then propose methods that can serve as baselines for this IE task: an unsupervised method based on a bags-of-n-grams similarity for extracting the needed tools in each repair step, and a deep-learning-based sequence labeling model for extracting the identity of disassembled parts. These baseline methods are integrated into a semi-automatic web-based annotator application that is also available along with the dataset.</abstract>
<identifier type="citekey">nabizadeh-etal-2020-myfixit</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.260</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>2120</start>
<end>2128</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MyFixit: An Annotated Dataset, Annotation Tool, and Baseline Methods for Information Extraction from Repair Manuals
%A Nabizadeh, Nima
%A Kolossa, Dorothea
%A Heckmann, Martin
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F nabizadeh-etal-2020-myfixit
%X Text instructions are among the most widely used media for learning and teaching. Hence, to create assistance systems that are capable of supporting humans autonomously in new tasks, it would be immensely productive, if machines were enabled to extract task knowledge from such text instructions. In this paper, we, therefore, focus on information extraction (IE) from the instructional text in repair manuals. This brings with it the multiple challenges of information extraction from the situated and technical language in relatively long and often complex instructions. To tackle these challenges, we introduce a semi-structured dataset of repair manuals. The dataset is annotated in a large category of devices, with information that we consider most valuable for an automated repair assistant, including the required tools and the disassembled parts at each step of the repair progress. We then propose methods that can serve as baselines for this IE task: an unsupervised method based on a bags-of-n-grams similarity for extracting the needed tools in each repair step, and a deep-learning-based sequence labeling model for extracting the identity of disassembled parts. These baseline methods are integrated into a semi-automatic web-based annotator application that is also available along with the dataset.
%U https://aclanthology.org/2020.lrec-1.260
%P 2120-2128
Markdown (Informal)
[MyFixit: An Annotated Dataset, Annotation Tool, and Baseline Methods for Information Extraction from Repair Manuals](https://aclanthology.org/2020.lrec-1.260) (Nabizadeh et al., LREC 2020)
ACL