@inproceedings{zhang-etal-2022-shallow,
title = "Shallow Parsing for {N}epal {B}hasa Complement Clauses",
author = "Zhang, Borui and
Kazemzadeh, Abe and
Reese, Brian",
editor = "Moeller, Sarah and
Anastasopoulos, Antonios and
Arppe, Antti and
Chaudhary, Aditi and
Harrigan, Atticus and
Holden, Josh and
Lachler, Jordan and
Palmer, Alexis and
Rijhwani, Shruti and
Schwartz, Lane",
booktitle = "Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.computel-1.8/",
doi = "10.18653/v1/2022.computel-1.8",
pages = "61--67",
abstract = "Accelerating the process of data collection, annotation, and analysis is an urgent need for linguistic fieldwork and documentation of endangered languages (Bird, 2009). Our experiments describe how we maximize the quality for the Nepal Bhasa syntactic complement structure chunking model. Native speaker language consultants were trained to annotate a minimally selected raw data set (Su{\'a}rez et al.,2019). The embedded clauses, matrix verbs, and embedded verbs are annotated. We apply both statistical training algorithms and transfer learning in our training, including Naive Bayes, MaxEnt, and fine-tuning the pre-trained mBERT model (Devlin et al., 2018). We show that with limited annotated data, the model is already sufficient for the task. The modeling resources we used are largely available for many other endangered languages. The practice is easy to duplicate for training a shallow parser for other endangered languages in general."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2022-shallow">
<titleInfo>
<title>Shallow Parsing for Nepal Bhasa Complement Clauses</title>
</titleInfo>
<name type="personal">
<namePart type="given">Borui</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abe</namePart>
<namePart type="family">Kazemzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Reese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antti</namePart>
<namePart type="family">Arppe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditi</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atticus</namePart>
<namePart type="family">Harrigan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josh</namePart>
<namePart type="family">Holden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Lachler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lane</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Accelerating the process of data collection, annotation, and analysis is an urgent need for linguistic fieldwork and documentation of endangered languages (Bird, 2009). Our experiments describe how we maximize the quality for the Nepal Bhasa syntactic complement structure chunking model. Native speaker language consultants were trained to annotate a minimally selected raw data set (Suárez et al.,2019). The embedded clauses, matrix verbs, and embedded verbs are annotated. We apply both statistical training algorithms and transfer learning in our training, including Naive Bayes, MaxEnt, and fine-tuning the pre-trained mBERT model (Devlin et al., 2018). We show that with limited annotated data, the model is already sufficient for the task. The modeling resources we used are largely available for many other endangered languages. The practice is easy to duplicate for training a shallow parser for other endangered languages in general.</abstract>
<identifier type="citekey">zhang-etal-2022-shallow</identifier>
<identifier type="doi">10.18653/v1/2022.computel-1.8</identifier>
<location>
<url>https://aclanthology.org/2022.computel-1.8/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>61</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Shallow Parsing for Nepal Bhasa Complement Clauses
%A Zhang, Borui
%A Kazemzadeh, Abe
%A Reese, Brian
%Y Moeller, Sarah
%Y Anastasopoulos, Antonios
%Y Arppe, Antti
%Y Chaudhary, Aditi
%Y Harrigan, Atticus
%Y Holden, Josh
%Y Lachler, Jordan
%Y Palmer, Alexis
%Y Rijhwani, Shruti
%Y Schwartz, Lane
%S Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F zhang-etal-2022-shallow
%X Accelerating the process of data collection, annotation, and analysis is an urgent need for linguistic fieldwork and documentation of endangered languages (Bird, 2009). Our experiments describe how we maximize the quality for the Nepal Bhasa syntactic complement structure chunking model. Native speaker language consultants were trained to annotate a minimally selected raw data set (Suárez et al.,2019). The embedded clauses, matrix verbs, and embedded verbs are annotated. We apply both statistical training algorithms and transfer learning in our training, including Naive Bayes, MaxEnt, and fine-tuning the pre-trained mBERT model (Devlin et al., 2018). We show that with limited annotated data, the model is already sufficient for the task. The modeling resources we used are largely available for many other endangered languages. The practice is easy to duplicate for training a shallow parser for other endangered languages in general.
%R 10.18653/v1/2022.computel-1.8
%U https://aclanthology.org/2022.computel-1.8/
%U https://doi.org/10.18653/v1/2022.computel-1.8
%P 61-67
Markdown (Informal)
[Shallow Parsing for Nepal Bhasa Complement Clauses](https://aclanthology.org/2022.computel-1.8/) (Zhang et al., ComputEL 2022)
ACL
- Borui Zhang, Abe Kazemzadeh, and Brian Reese. 2022. Shallow Parsing for Nepal Bhasa Complement Clauses. In Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages, pages 61–67, Dublin, Ireland. Association for Computational Linguistics.