@inproceedings{boldsen-etal-2022-interpreting,
title = "Interpreting Character Embeddings With Perceptual Representations: The Case of Shape, Sound, and Color",
author = "Boldsen, Sidsel and
Agirrezabal, Manex and
Hollenstein, Nora",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.470",
doi = "10.18653/v1/2022.acl-long.470",
pages = "6819--6836",
abstract = "Character-level information is included in many NLP models, but evaluating the information encoded in character representations is an open issue. We leverage perceptual representations in the form of shape, sound, and color embeddings and perform a representational similarity analysis to evaluate their correlation with textual representations in five languages. This cross-lingual analysis shows that textual character representations correlate strongly with sound representations for languages using an alphabetic script, while shape correlates with featural scripts. We further develop a set of probing classifiers to intrinsically evaluate what phonological information is encoded in character embeddings. Our results suggest that information on features such as voicing are embedded in both LSTM and transformer-based representations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="boldsen-etal-2022-interpreting">
<titleInfo>
<title>Interpreting Character Embeddings With Perceptual Representations: The Case of Shape, Sound, and Color</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sidsel</namePart>
<namePart type="family">Boldsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manex</namePart>
<namePart type="family">Agirrezabal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Hollenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Character-level information is included in many NLP models, but evaluating the information encoded in character representations is an open issue. We leverage perceptual representations in the form of shape, sound, and color embeddings and perform a representational similarity analysis to evaluate their correlation with textual representations in five languages. This cross-lingual analysis shows that textual character representations correlate strongly with sound representations for languages using an alphabetic script, while shape correlates with featural scripts. We further develop a set of probing classifiers to intrinsically evaluate what phonological information is encoded in character embeddings. Our results suggest that information on features such as voicing are embedded in both LSTM and transformer-based representations.</abstract>
<identifier type="citekey">boldsen-etal-2022-interpreting</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.470</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.470</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>6819</start>
<end>6836</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Interpreting Character Embeddings With Perceptual Representations: The Case of Shape, Sound, and Color
%A Boldsen, Sidsel
%A Agirrezabal, Manex
%A Hollenstein, Nora
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F boldsen-etal-2022-interpreting
%X Character-level information is included in many NLP models, but evaluating the information encoded in character representations is an open issue. We leverage perceptual representations in the form of shape, sound, and color embeddings and perform a representational similarity analysis to evaluate their correlation with textual representations in five languages. This cross-lingual analysis shows that textual character representations correlate strongly with sound representations for languages using an alphabetic script, while shape correlates with featural scripts. We further develop a set of probing classifiers to intrinsically evaluate what phonological information is encoded in character embeddings. Our results suggest that information on features such as voicing are embedded in both LSTM and transformer-based representations.
%R 10.18653/v1/2022.acl-long.470
%U https://aclanthology.org/2022.acl-long.470
%U https://doi.org/10.18653/v1/2022.acl-long.470
%P 6819-6836
Markdown (Informal)
[Interpreting Character Embeddings With Perceptual Representations: The Case of Shape, Sound, and Color](https://aclanthology.org/2022.acl-long.470) (Boldsen et al., ACL 2022)
ACL