@inproceedings{repar-shumakov-2021-aligning,
title = "Aligning {E}stonian and {R}ussian news industry keywords with the help of subtitle translations and an environmental thesaurus",
author = "Repar, Andra{\v{z}} and
Shumakov, Andrej",
editor = "Toivonen, Hannu and
Boggia, Michele",
booktitle = "Proceedings of the EACL Hackashop on News Media Content Analysis and Automated Report Generation",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.hackashop-1.10",
pages = "71--75",
abstract = "This paper presents the implementation of a bilingual term alignment approach developed by Repar et al. (2019) to a dataset of unaligned Estonian and Russian keywords which were manually assigned by journalists to describe the article topic. We started by separating the dataset into Estonian and Russian tags based on whether they are written in the Latin or Cyrillic script. Then we selected the available language-specific resources necessary for the alignment system to work. Despite the domains of the language-specific resources (subtitles and environment) not matching the domain of the dataset (news articles), we were able to achieve respectable results with manual evaluation indicating that almost 3/4 of the aligned keyword pairs are at least partial matches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="repar-shumakov-2021-aligning">
<titleInfo>
<title>Aligning Estonian and Russian news industry keywords with the help of subtitle translations and an environmental thesaurus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andraž</namePart>
<namePart type="family">Repar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrej</namePart>
<namePart type="family">Shumakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the EACL Hackashop on News Media Content Analysis and Automated Report Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannu</namePart>
<namePart type="family">Toivonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michele</namePart>
<namePart type="family">Boggia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the implementation of a bilingual term alignment approach developed by Repar et al. (2019) to a dataset of unaligned Estonian and Russian keywords which were manually assigned by journalists to describe the article topic. We started by separating the dataset into Estonian and Russian tags based on whether they are written in the Latin or Cyrillic script. Then we selected the available language-specific resources necessary for the alignment system to work. Despite the domains of the language-specific resources (subtitles and environment) not matching the domain of the dataset (news articles), we were able to achieve respectable results with manual evaluation indicating that almost 3/4 of the aligned keyword pairs are at least partial matches.</abstract>
<identifier type="citekey">repar-shumakov-2021-aligning</identifier>
<location>
<url>https://aclanthology.org/2021.hackashop-1.10</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>71</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Aligning Estonian and Russian news industry keywords with the help of subtitle translations and an environmental thesaurus
%A Repar, Andraž
%A Shumakov, Andrej
%Y Toivonen, Hannu
%Y Boggia, Michele
%S Proceedings of the EACL Hackashop on News Media Content Analysis and Automated Report Generation
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F repar-shumakov-2021-aligning
%X This paper presents the implementation of a bilingual term alignment approach developed by Repar et al. (2019) to a dataset of unaligned Estonian and Russian keywords which were manually assigned by journalists to describe the article topic. We started by separating the dataset into Estonian and Russian tags based on whether they are written in the Latin or Cyrillic script. Then we selected the available language-specific resources necessary for the alignment system to work. Despite the domains of the language-specific resources (subtitles and environment) not matching the domain of the dataset (news articles), we were able to achieve respectable results with manual evaluation indicating that almost 3/4 of the aligned keyword pairs are at least partial matches.
%U https://aclanthology.org/2021.hackashop-1.10
%P 71-75
Markdown (Informal)
[Aligning Estonian and Russian news industry keywords with the help of subtitle translations and an environmental thesaurus](https://aclanthology.org/2021.hackashop-1.10) (Repar & Shumakov, Hackashop 2021)
ACL