@inproceedings{sakamoto-aizawa-2023-predicting,
title = "Predicting Numerals in Text Using Nearest Neighbor Language Models",
author = "Sakamoto, Taku and
Aizawa, Akiko",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.295",
doi = "10.18653/v1/2023.findings-acl.295",
pages = "4795--4809",
abstract = "Commonsense about quantitative properties is essential for a deep understanding of texts containing numerals. However, naive language models (LMs) treat numerals as string tokens; therefore, they lack an understanding of the magnitudes of numerals, resulting in a difficulty in acquiring the commonsense. In this study, we apply the $k$-nearest neighbor LM ($k$NN-LM) to the masked numeral prediction (MNP) task, which measures the quantitative commonsense of LMs.$k$NN-LM extends pre-trained neural LMs with the $k$-nearest neighbor ($k$NN) search.Since it can utilize patterns that appear in the datastore for prediction, we expect an improvement in numeral prediction accuracy, which is associated with a high rate of occurrence of out-of-vocabulary (OOV) words.Through experiments, we verified that the retrieval-based method is effective for fine-grained predictions of numerals from context, especially for the OOV numerals.We also compared two different context spans for context representations to improve the accuracy of $k$NN search by using only the words that are closely related to the masked numeral: the mask and its surrounding words, and the mask and its subsequent words.Our results reveal that using only the embeddings of mask tokens for numerals in $k$NN search is the most effective approach for realizing MNP tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sakamoto-aizawa-2023-predicting">
<titleInfo>
<title>Predicting Numerals in Text Using Nearest Neighbor Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Taku</namePart>
<namePart type="family">Sakamoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Aizawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Commonsense about quantitative properties is essential for a deep understanding of texts containing numerals. However, naive language models (LMs) treat numerals as string tokens; therefore, they lack an understanding of the magnitudes of numerals, resulting in a difficulty in acquiring the commonsense. In this study, we apply the k-nearest neighbor LM (kNN-LM) to the masked numeral prediction (MNP) task, which measures the quantitative commonsense of LMs.kNN-LM extends pre-trained neural LMs with the k-nearest neighbor (kNN) search.Since it can utilize patterns that appear in the datastore for prediction, we expect an improvement in numeral prediction accuracy, which is associated with a high rate of occurrence of out-of-vocabulary (OOV) words.Through experiments, we verified that the retrieval-based method is effective for fine-grained predictions of numerals from context, especially for the OOV numerals.We also compared two different context spans for context representations to improve the accuracy of kNN search by using only the words that are closely related to the masked numeral: the mask and its surrounding words, and the mask and its subsequent words.Our results reveal that using only the embeddings of mask tokens for numerals in kNN search is the most effective approach for realizing MNP tasks.</abstract>
<identifier type="citekey">sakamoto-aizawa-2023-predicting</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.295</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.295</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>4795</start>
<end>4809</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Predicting Numerals in Text Using Nearest Neighbor Language Models
%A Sakamoto, Taku
%A Aizawa, Akiko
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F sakamoto-aizawa-2023-predicting
%X Commonsense about quantitative properties is essential for a deep understanding of texts containing numerals. However, naive language models (LMs) treat numerals as string tokens; therefore, they lack an understanding of the magnitudes of numerals, resulting in a difficulty in acquiring the commonsense. In this study, we apply the k-nearest neighbor LM (kNN-LM) to the masked numeral prediction (MNP) task, which measures the quantitative commonsense of LMs.kNN-LM extends pre-trained neural LMs with the k-nearest neighbor (kNN) search.Since it can utilize patterns that appear in the datastore for prediction, we expect an improvement in numeral prediction accuracy, which is associated with a high rate of occurrence of out-of-vocabulary (OOV) words.Through experiments, we verified that the retrieval-based method is effective for fine-grained predictions of numerals from context, especially for the OOV numerals.We also compared two different context spans for context representations to improve the accuracy of kNN search by using only the words that are closely related to the masked numeral: the mask and its surrounding words, and the mask and its subsequent words.Our results reveal that using only the embeddings of mask tokens for numerals in kNN search is the most effective approach for realizing MNP tasks.
%R 10.18653/v1/2023.findings-acl.295
%U https://aclanthology.org/2023.findings-acl.295
%U https://doi.org/10.18653/v1/2023.findings-acl.295
%P 4795-4809
Markdown (Informal)
[Predicting Numerals in Text Using Nearest Neighbor Language Models](https://aclanthology.org/2023.findings-acl.295) (Sakamoto & Aizawa, Findings 2023)
ACL