@inproceedings{chen-etal-2023-mingofficial,
title = "{M}ing{O}fficial: A Ming Official Career Dataset and a Historical Context-Aware Representation Learning Framework",
author = "Chen, You-Jun and
Hsieh, Hsin-Yi and
Lin, Yu and
Tian, Yingtao and
Chan, Bert and
Liu, Yu-Sin and
Lin, Yi-Hsuan and
Tsai, Richard",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.266/",
doi = "10.18653/v1/2023.emnlp-main.266",
pages = "4380--4401",
abstract = "In Chinese studies, understanding the nuanced traits of historical figures, often not explicitly evident in biographical data, has been a key interest. However, identifying these traits can be challenging due to the need for domain expertise, specialist knowledge, and context-specific insights, making the process time-consuming and difficult to scale. Our focus on studying officials from China`s Ming Dynasty is no exception. To tackle this challenge, we propose MingOfficial, a large-scale multi-modal dataset consisting of both structured (career records, annotated personnel types) and text (historical texts) data for 9,376 officials. We further couple the dataset with a a graph neural network (GNN) to combine both modalities in order to allow investigation of social structures and provide features to boost down-stream tasks. Experiments show that our proposed MingOfficial could enable exploratory analysis of official identities, and also significantly boost performance in tasks such as identifying nuance identities (e.g. civil officials holding military power) from 24.6{\%} to 98.2{\%} F$_1$ score in hold-out test set. By making MingOfficial publicly available (see main text for the URL) as both a dataset and an interactive tool, we aim to stimulate further research into the role of social context and representation learning in identifying individual characteristics, and hope to provide inspiration for computational approaches in other fields beyond Chinese studies."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2023-mingofficial">
<titleInfo>
<title>MingOfficial: A Ming Official Career Dataset and a Historical Context-Aware Representation Learning Framework</title>
</titleInfo>
<name type="personal">
<namePart type="given">You-Jun</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Yi</namePart>
<namePart type="family">Hsieh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingtao</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bert</namePart>
<namePart type="family">Chan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Sin</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Hsuan</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Tsai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In Chinese studies, understanding the nuanced traits of historical figures, often not explicitly evident in biographical data, has been a key interest. However, identifying these traits can be challenging due to the need for domain expertise, specialist knowledge, and context-specific insights, making the process time-consuming and difficult to scale. Our focus on studying officials from China‘s Ming Dynasty is no exception. To tackle this challenge, we propose MingOfficial, a large-scale multi-modal dataset consisting of both structured (career records, annotated personnel types) and text (historical texts) data for 9,376 officials. We further couple the dataset with a a graph neural network (GNN) to combine both modalities in order to allow investigation of social structures and provide features to boost down-stream tasks. Experiments show that our proposed MingOfficial could enable exploratory analysis of official identities, and also significantly boost performance in tasks such as identifying nuance identities (e.g. civil officials holding military power) from 24.6% to 98.2% F₁ score in hold-out test set. By making MingOfficial publicly available (see main text for the URL) as both a dataset and an interactive tool, we aim to stimulate further research into the role of social context and representation learning in identifying individual characteristics, and hope to provide inspiration for computational approaches in other fields beyond Chinese studies.</abstract>
<identifier type="citekey">chen-etal-2023-mingofficial</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.266</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.266/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>4380</start>
<end>4401</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MingOfficial: A Ming Official Career Dataset and a Historical Context-Aware Representation Learning Framework
%A Chen, You-Jun
%A Hsieh, Hsin-Yi
%A Lin, Yu
%A Tian, Yingtao
%A Chan, Bert
%A Liu, Yu-Sin
%A Lin, Yi-Hsuan
%A Tsai, Richard
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F chen-etal-2023-mingofficial
%X In Chinese studies, understanding the nuanced traits of historical figures, often not explicitly evident in biographical data, has been a key interest. However, identifying these traits can be challenging due to the need for domain expertise, specialist knowledge, and context-specific insights, making the process time-consuming and difficult to scale. Our focus on studying officials from China‘s Ming Dynasty is no exception. To tackle this challenge, we propose MingOfficial, a large-scale multi-modal dataset consisting of both structured (career records, annotated personnel types) and text (historical texts) data for 9,376 officials. We further couple the dataset with a a graph neural network (GNN) to combine both modalities in order to allow investigation of social structures and provide features to boost down-stream tasks. Experiments show that our proposed MingOfficial could enable exploratory analysis of official identities, and also significantly boost performance in tasks such as identifying nuance identities (e.g. civil officials holding military power) from 24.6% to 98.2% F₁ score in hold-out test set. By making MingOfficial publicly available (see main text for the URL) as both a dataset and an interactive tool, we aim to stimulate further research into the role of social context and representation learning in identifying individual characteristics, and hope to provide inspiration for computational approaches in other fields beyond Chinese studies.
%R 10.18653/v1/2023.emnlp-main.266
%U https://aclanthology.org/2023.emnlp-main.266/
%U https://doi.org/10.18653/v1/2023.emnlp-main.266
%P 4380-4401
Markdown (Informal)
[MingOfficial: A Ming Official Career Dataset and a Historical Context-Aware Representation Learning Framework](https://aclanthology.org/2023.emnlp-main.266/) (Chen et al., EMNLP 2023)
ACL