@inproceedings{anderson-2024-exploring-language,
title = "Exploring Language Representation through a Resource Inventory Project",
author = "Anderson, Carolyn",
editor = {Al-azzawi, Sana and
Biester, Laura and
Kov{\'a}cs, Gy{\"o}rgy and
Marasovi{\'c}, Ana and
Mathur, Leena and
Mieskes, Margot and
Weissweiler, Leonie},
booktitle = "Proceedings of the Sixth Workshop on Teaching NLP",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.teachingnlp-1.14",
pages = "91--93",
abstract = "The increasing scale of large language models has led some students to wonder what contributions can be made in academia. However, students are often unaware that LLM-based approaches are not feasible for the majority of the world{'}s languages due to lack of data availability. This paper presents a research project in which students explore the issue of language representation by creating an inventory of the data, preprocessing, and model resources available for a less-resourced language. Students are put into small groups and assigned a language to research. Within the group, students take on one of three roles: dataset investigator, preprocessing investigator, or downstream task investigator. Students then work together to create a 7-page research report about their language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="anderson-2024-exploring-language">
<titleInfo>
<title>Exploring Language Representation through a Resource Inventory Project</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Teaching NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Al-azzawi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Biester</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">György</namePart>
<namePart type="family">Kovács</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Marasović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leena</namePart>
<namePart type="family">Mathur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margot</namePart>
<namePart type="family">Mieskes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonie</namePart>
<namePart type="family">Weissweiler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The increasing scale of large language models has led some students to wonder what contributions can be made in academia. However, students are often unaware that LLM-based approaches are not feasible for the majority of the world’s languages due to lack of data availability. This paper presents a research project in which students explore the issue of language representation by creating an inventory of the data, preprocessing, and model resources available for a less-resourced language. Students are put into small groups and assigned a language to research. Within the group, students take on one of three roles: dataset investigator, preprocessing investigator, or downstream task investigator. Students then work together to create a 7-page research report about their language.</abstract>
<identifier type="citekey">anderson-2024-exploring-language</identifier>
<location>
<url>https://aclanthology.org/2024.teachingnlp-1.14</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>91</start>
<end>93</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Language Representation through a Resource Inventory Project
%A Anderson, Carolyn
%Y Al-azzawi, Sana
%Y Biester, Laura
%Y Kovács, György
%Y Marasović, Ana
%Y Mathur, Leena
%Y Mieskes, Margot
%Y Weissweiler, Leonie
%S Proceedings of the Sixth Workshop on Teaching NLP
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F anderson-2024-exploring-language
%X The increasing scale of large language models has led some students to wonder what contributions can be made in academia. However, students are often unaware that LLM-based approaches are not feasible for the majority of the world’s languages due to lack of data availability. This paper presents a research project in which students explore the issue of language representation by creating an inventory of the data, preprocessing, and model resources available for a less-resourced language. Students are put into small groups and assigned a language to research. Within the group, students take on one of three roles: dataset investigator, preprocessing investigator, or downstream task investigator. Students then work together to create a 7-page research report about their language.
%U https://aclanthology.org/2024.teachingnlp-1.14
%P 91-93
Markdown (Informal)
[Exploring Language Representation through a Resource Inventory Project](https://aclanthology.org/2024.teachingnlp-1.14) (Anderson, TeachingNLP-WS 2024)
ACL