@inproceedings{tian-kubler-2021-period,
title = "Period Classification in {C}hinese Historical Texts",
author = {Tian, Zuoyu and
K{\"u}bler, Sandra},
editor = "Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Reiter, Nils and
Szpakowicz, Stan",
booktitle = "Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic (online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.latechclfl-1.19",
doi = "10.18653/v1/2021.latechclfl-1.19",
pages = "168--177",
abstract = "In this study, we study language change in Chinese Biji by using a classification task: classifying Ancient Chinese texts by time periods. Specifically, we focus on a unique genre in classical Chinese literature: Biji (literally {``}notebook{''} or {``}brush notes{''}), i.e., collections of anecdotes, quotations, etc., anything authors consider noteworthy, Biji span hundreds of years across many dynasties and conserve informal language in written form. For these reasons, they are regarded as a good resource for investigating language change in Chinese (Fang, 2010). In this paper, we create a new dataset of 108 Biji across four dynasties. Based on the dataset, we first introduce a time period classification task for Chinese. Then we investigate different feature representation methods for classification. The results show that models using contextualized embeddings perform best. An analysis of the top features chosen by the word n-gram model (after bleaching proper nouns) confirms that these features are informative and correspond to observations and assumptions made by historical linguists.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tian-kubler-2021-period">
<titleInfo>
<title>Period Classification in Chinese Historical Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zuoyu</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kübler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we study language change in Chinese Biji by using a classification task: classifying Ancient Chinese texts by time periods. Specifically, we focus on a unique genre in classical Chinese literature: Biji (literally “notebook” or “brush notes”), i.e., collections of anecdotes, quotations, etc., anything authors consider noteworthy, Biji span hundreds of years across many dynasties and conserve informal language in written form. For these reasons, they are regarded as a good resource for investigating language change in Chinese (Fang, 2010). In this paper, we create a new dataset of 108 Biji across four dynasties. Based on the dataset, we first introduce a time period classification task for Chinese. Then we investigate different feature representation methods for classification. The results show that models using contextualized embeddings perform best. An analysis of the top features chosen by the word n-gram model (after bleaching proper nouns) confirms that these features are informative and correspond to observations and assumptions made by historical linguists.</abstract>
<identifier type="citekey">tian-kubler-2021-period</identifier>
<identifier type="doi">10.18653/v1/2021.latechclfl-1.19</identifier>
<location>
<url>https://aclanthology.org/2021.latechclfl-1.19</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>168</start>
<end>177</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Period Classification in Chinese Historical Texts
%A Tian, Zuoyu
%A Kübler, Sandra
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Reiter, Nils
%Y Szpakowicz, Stan
%S Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic (online)
%F tian-kubler-2021-period
%X In this study, we study language change in Chinese Biji by using a classification task: classifying Ancient Chinese texts by time periods. Specifically, we focus on a unique genre in classical Chinese literature: Biji (literally “notebook” or “brush notes”), i.e., collections of anecdotes, quotations, etc., anything authors consider noteworthy, Biji span hundreds of years across many dynasties and conserve informal language in written form. For these reasons, they are regarded as a good resource for investigating language change in Chinese (Fang, 2010). In this paper, we create a new dataset of 108 Biji across four dynasties. Based on the dataset, we first introduce a time period classification task for Chinese. Then we investigate different feature representation methods for classification. The results show that models using contextualized embeddings perform best. An analysis of the top features chosen by the word n-gram model (after bleaching proper nouns) confirms that these features are informative and correspond to observations and assumptions made by historical linguists.
%R 10.18653/v1/2021.latechclfl-1.19
%U https://aclanthology.org/2021.latechclfl-1.19
%U https://doi.org/10.18653/v1/2021.latechclfl-1.19
%P 168-177
Markdown (Informal)
[Period Classification in Chinese Historical Texts](https://aclanthology.org/2021.latechclfl-1.19) (Tian & Kübler, LaTeCHCLfL 2021)
ACL
- Zuoyu Tian and Sandra Kübler. 2021. Period Classification in Chinese Historical Texts. In Proceedings of the 5th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pages 168–177, Punta Cana, Dominican Republic (online). Association for Computational Linguistics.