@inproceedings{bunzeck-zarriess-2023-entrenchment,
title = "Entrenchment Matters: Investigating Positional and Constructional Sensitivity in Small and Large Language Models",
author = "Bunzeck, Bastian and
Zarrie{\ss}, Sina",
editor = "Breitholtz, Ellen and
Lappin, Shalom and
Loaiciga, Sharid and
Ilinykh, Nikolai and
Dobnik, Simon",
booktitle = "Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD)",
month = sep,
year = "2023",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.clasp-1.3/",
pages = "25--37",
abstract = "The success of large language models (LMs) has also prompted a push towards smaller models, but the differences in functionality and encodings between these two types of models are not yet well understood. In this paper, we employ a perturbed masking approach to investigate differences in token influence patterns on the sequence embeddings of larger and smaller RoBERTa models. Specifically, we explore how token properties like position, length or part of speech influence their sequence embeddings. We find that there is a general tendency for sequence-final tokens to exert a higher influence. Among part-of-speech tags, nouns, numerals and punctuation marks are the most influential, with smaller deviations for individual models. These findings also align with usage-based linguistic evidence on the effect of entrenchment. Finally, we show that the relationship between data size and model size influences the variability and brittleness of these effects, hinting towards a need for holistically balanced models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bunzeck-zarriess-2023-entrenchment">
<titleInfo>
<title>Entrenchment Matters: Investigating Positional and Constructional Sensitivity in Small and Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bastian</namePart>
<namePart type="family">Bunzeck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Breitholtz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shalom</namePart>
<namePart type="family">Lappin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharid</namePart>
<namePart type="family">Loaiciga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolai</namePart>
<namePart type="family">Ilinykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The success of large language models (LMs) has also prompted a push towards smaller models, but the differences in functionality and encodings between these two types of models are not yet well understood. In this paper, we employ a perturbed masking approach to investigate differences in token influence patterns on the sequence embeddings of larger and smaller RoBERTa models. Specifically, we explore how token properties like position, length or part of speech influence their sequence embeddings. We find that there is a general tendency for sequence-final tokens to exert a higher influence. Among part-of-speech tags, nouns, numerals and punctuation marks are the most influential, with smaller deviations for individual models. These findings also align with usage-based linguistic evidence on the effect of entrenchment. Finally, we show that the relationship between data size and model size influences the variability and brittleness of these effects, hinting towards a need for holistically balanced models.</abstract>
<identifier type="citekey">bunzeck-zarriess-2023-entrenchment</identifier>
<location>
<url>https://aclanthology.org/2023.clasp-1.3/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>25</start>
<end>37</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Entrenchment Matters: Investigating Positional and Constructional Sensitivity in Small and Large Language Models
%A Bunzeck, Bastian
%A Zarrieß, Sina
%Y Breitholtz, Ellen
%Y Lappin, Shalom
%Y Loaiciga, Sharid
%Y Ilinykh, Nikolai
%Y Dobnik, Simon
%S Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD)
%D 2023
%8 September
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F bunzeck-zarriess-2023-entrenchment
%X The success of large language models (LMs) has also prompted a push towards smaller models, but the differences in functionality and encodings between these two types of models are not yet well understood. In this paper, we employ a perturbed masking approach to investigate differences in token influence patterns on the sequence embeddings of larger and smaller RoBERTa models. Specifically, we explore how token properties like position, length or part of speech influence their sequence embeddings. We find that there is a general tendency for sequence-final tokens to exert a higher influence. Among part-of-speech tags, nouns, numerals and punctuation marks are the most influential, with smaller deviations for individual models. These findings also align with usage-based linguistic evidence on the effect of entrenchment. Finally, we show that the relationship between data size and model size influences the variability and brittleness of these effects, hinting towards a need for holistically balanced models.
%U https://aclanthology.org/2023.clasp-1.3/
%P 25-37
Markdown (Informal)
[Entrenchment Matters: Investigating Positional and Constructional Sensitivity in Small and Large Language Models](https://aclanthology.org/2023.clasp-1.3/) (Bunzeck & Zarrieß, CLASP 2023)
ACL