@inproceedings{bjorklund-devinney-2023-computer,
title = "Computer, enhence: {POS}-tagging improvements for nonbinary pronoun use in {S}wedish",
author = {Bj{\"o}rklund, Henrik and
Devinney, Hannah},
editor = "Chakravarthi, Bharathi R. and
Bharathi, B. and
Griffith, Joephine and
Bali, Kalika and
Buitelaar, Paul",
booktitle = "Proceedings of the Third Workshop on Language Technology for Equality, Diversity and Inclusion",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ltedi-1.8/",
pages = "54--61",
abstract = "Part of Speech (POS) taggers for Swedish routinely fail for the third person gender-neutral pronoun {\textquotedblleft}hen{\textquotedblright}, despite the fact that it has been a well-established part of the Swedish language since at least 2014. In addition to simply being a form of gender bias, this failure can have negative effects on other tasks relying on POS information. We demonstrate the usefulness of semi-synthetic augmented datasets in a case study, retraining a POS tagger to correctly recognize {\textquotedblleft}hen{\textquotedblright} as a personal pronoun. We evaluate our retrained models for both tag accuracy and on a downstream task (dependency parsing) in a classicial NLP pipeline. Our results show that adding such data works to correct for the disparity in performance. The accuracy rate for identifying {\textquotedblleft}hen{\textquotedblright} as a pronoun can be brought up to acceptable levels with only minor adjustments to the tagger`s vocabulary files. Performance parity to gendered pronouns can be reached after retraining with only a few hundred examples. This increase in POS tag accuracy also results in improvements for dependency parsing sentences containing hen."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bjorklund-devinney-2023-computer">
<titleInfo>
<title>Computer, enhence: POS-tagging improvements for nonbinary pronoun use in Swedish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Henrik</namePart>
<namePart type="family">Björklund</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannah</namePart>
<namePart type="family">Devinney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Language Technology for Equality, Diversity and Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">B</namePart>
<namePart type="family">Bharathi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joephine</namePart>
<namePart type="family">Griffith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Part of Speech (POS) taggers for Swedish routinely fail for the third person gender-neutral pronoun “hen”, despite the fact that it has been a well-established part of the Swedish language since at least 2014. In addition to simply being a form of gender bias, this failure can have negative effects on other tasks relying on POS information. We demonstrate the usefulness of semi-synthetic augmented datasets in a case study, retraining a POS tagger to correctly recognize “hen” as a personal pronoun. We evaluate our retrained models for both tag accuracy and on a downstream task (dependency parsing) in a classicial NLP pipeline. Our results show that adding such data works to correct for the disparity in performance. The accuracy rate for identifying “hen” as a pronoun can be brought up to acceptable levels with only minor adjustments to the tagger‘s vocabulary files. Performance parity to gendered pronouns can be reached after retraining with only a few hundred examples. This increase in POS tag accuracy also results in improvements for dependency parsing sentences containing hen.</abstract>
<identifier type="citekey">bjorklund-devinney-2023-computer</identifier>
<location>
<url>https://aclanthology.org/2023.ltedi-1.8/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>54</start>
<end>61</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Computer, enhence: POS-tagging improvements for nonbinary pronoun use in Swedish
%A Björklund, Henrik
%A Devinney, Hannah
%Y Chakravarthi, Bharathi R.
%Y Bharathi, B.
%Y Griffith, Joephine
%Y Bali, Kalika
%Y Buitelaar, Paul
%S Proceedings of the Third Workshop on Language Technology for Equality, Diversity and Inclusion
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F bjorklund-devinney-2023-computer
%X Part of Speech (POS) taggers for Swedish routinely fail for the third person gender-neutral pronoun “hen”, despite the fact that it has been a well-established part of the Swedish language since at least 2014. In addition to simply being a form of gender bias, this failure can have negative effects on other tasks relying on POS information. We demonstrate the usefulness of semi-synthetic augmented datasets in a case study, retraining a POS tagger to correctly recognize “hen” as a personal pronoun. We evaluate our retrained models for both tag accuracy and on a downstream task (dependency parsing) in a classicial NLP pipeline. Our results show that adding such data works to correct for the disparity in performance. The accuracy rate for identifying “hen” as a pronoun can be brought up to acceptable levels with only minor adjustments to the tagger‘s vocabulary files. Performance parity to gendered pronouns can be reached after retraining with only a few hundred examples. This increase in POS tag accuracy also results in improvements for dependency parsing sentences containing hen.
%U https://aclanthology.org/2023.ltedi-1.8/
%P 54-61
Markdown (Informal)
[Computer, enhence: POS-tagging improvements for nonbinary pronoun use in Swedish](https://aclanthology.org/2023.ltedi-1.8/) (Björklund & Devinney, LTEDI 2023)
ACL