@inproceedings{zaitova-etal-2023-microsyntactic,
title = "Microsyntactic Unit Detection Using Word Embedding Models: Experiments on {S}lavic Languages",
author = "Zaitova, Iuliia and
Stenger, Irina and
Avgustinova, Tania",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.134",
pages = "1265--1273",
abstract = "Microsyntactic units have been defined as language-specific transitional entities between lexicon and grammar, whose idiomatic properties are closely tied to syntax. These units are typically described based on individual constructions, making it difficult to understand them comprehensively as a class. This study proposes a novel approach to detect microsyntactic units using Word Embedding Models (WEMs) trained on six Slavic languages, namely Belarusian, Bulgarian, Czech, Polish, Russian, and Ukrainian, and evaluates how well these models capture the nuances of syntactic non-compositionality. To evaluate the models, we develop a cross-lingual inventory of microsyntactic units using the lists of microsyntantic units available at the Russian National Corpus. Our results demonstrate the effectiveness of WEMs in capturing microsyntactic units across all six Slavic languages under analysis. Additionally, we find that WEMs tailored for syntax-based tasks consistently outperform other WEMs at the task. Our findings contribute to the theory of microsyntax by providing insights into the detection of microsyntactic units and their cross-linguistic properties.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zaitova-etal-2023-microsyntactic">
<titleInfo>
<title>Microsyntactic Unit Detection Using Word Embedding Models: Experiments on Slavic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iuliia</namePart>
<namePart type="family">Zaitova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Stenger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tania</namePart>
<namePart type="family">Avgustinova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Microsyntactic units have been defined as language-specific transitional entities between lexicon and grammar, whose idiomatic properties are closely tied to syntax. These units are typically described based on individual constructions, making it difficult to understand them comprehensively as a class. This study proposes a novel approach to detect microsyntactic units using Word Embedding Models (WEMs) trained on six Slavic languages, namely Belarusian, Bulgarian, Czech, Polish, Russian, and Ukrainian, and evaluates how well these models capture the nuances of syntactic non-compositionality. To evaluate the models, we develop a cross-lingual inventory of microsyntactic units using the lists of microsyntantic units available at the Russian National Corpus. Our results demonstrate the effectiveness of WEMs in capturing microsyntactic units across all six Slavic languages under analysis. Additionally, we find that WEMs tailored for syntax-based tasks consistently outperform other WEMs at the task. Our findings contribute to the theory of microsyntax by providing insights into the detection of microsyntactic units and their cross-linguistic properties.</abstract>
<identifier type="citekey">zaitova-etal-2023-microsyntactic</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.134</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>1265</start>
<end>1273</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Microsyntactic Unit Detection Using Word Embedding Models: Experiments on Slavic Languages
%A Zaitova, Iuliia
%A Stenger, Irina
%A Avgustinova, Tania
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F zaitova-etal-2023-microsyntactic
%X Microsyntactic units have been defined as language-specific transitional entities between lexicon and grammar, whose idiomatic properties are closely tied to syntax. These units are typically described based on individual constructions, making it difficult to understand them comprehensively as a class. This study proposes a novel approach to detect microsyntactic units using Word Embedding Models (WEMs) trained on six Slavic languages, namely Belarusian, Bulgarian, Czech, Polish, Russian, and Ukrainian, and evaluates how well these models capture the nuances of syntactic non-compositionality. To evaluate the models, we develop a cross-lingual inventory of microsyntactic units using the lists of microsyntantic units available at the Russian National Corpus. Our results demonstrate the effectiveness of WEMs in capturing microsyntactic units across all six Slavic languages under analysis. Additionally, we find that WEMs tailored for syntax-based tasks consistently outperform other WEMs at the task. Our findings contribute to the theory of microsyntax by providing insights into the detection of microsyntactic units and their cross-linguistic properties.
%U https://aclanthology.org/2023.ranlp-1.134
%P 1265-1273
Markdown (Informal)
[Microsyntactic Unit Detection Using Word Embedding Models: Experiments on Slavic Languages](https://aclanthology.org/2023.ranlp-1.134) (Zaitova et al., RANLP 2023)
ACL