@inproceedings{saichyshyna-etal-2023-extension,
title = "Extension {M}ulti30{K}: Multimodal Dataset for Integrated Vision and Language Research in {U}krainian",
author = "Saichyshyna, Nataliia and
Maksymenko, Daniil and
Turuta, Oleksii and
Yerokhin, Andriy and
Babii, Andrii and
Turuta, Olena",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.unlp-1.7",
doi = "10.18653/v1/2023.unlp-1.7",
pages = "54--61",
abstract = "We share the results of the project within the well-known Multi30k dataset dedicated to improving machine translation of text from English into Ukrainian. The main task was to manually prepare the dataset and improve the translation of texts. The importance of collecting such datasets for low-resource languages for improving the quality of machine translation has been discussed. We also studied the features of translations of words and sentences with ambiguous meanings. The collection of multimodal datasets is essential for natural language processing tasks because it allows the development of more complex and comprehensive machine learning models that can understand and analyze different types of data. These models can learn from a variety of data types, including images, text, and audio, for more accurate and meaningful results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saichyshyna-etal-2023-extension">
<titleInfo>
<title>Extension Multi30K: Multimodal Dataset for Integrated Vision and Language Research in Ukrainian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nataliia</namePart>
<namePart type="family">Saichyshyna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Maksymenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleksii</namePart>
<namePart type="family">Turuta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andriy</namePart>
<namePart type="family">Yerokhin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrii</namePart>
<namePart type="family">Babii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olena</namePart>
<namePart type="family">Turuta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We share the results of the project within the well-known Multi30k dataset dedicated to improving machine translation of text from English into Ukrainian. The main task was to manually prepare the dataset and improve the translation of texts. The importance of collecting such datasets for low-resource languages for improving the quality of machine translation has been discussed. We also studied the features of translations of words and sentences with ambiguous meanings. The collection of multimodal datasets is essential for natural language processing tasks because it allows the development of more complex and comprehensive machine learning models that can understand and analyze different types of data. These models can learn from a variety of data types, including images, text, and audio, for more accurate and meaningful results.</abstract>
<identifier type="citekey">saichyshyna-etal-2023-extension</identifier>
<identifier type="doi">10.18653/v1/2023.unlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2023.unlp-1.7</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>54</start>
<end>61</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Extension Multi30K: Multimodal Dataset for Integrated Vision and Language Research in Ukrainian
%A Saichyshyna, Nataliia
%A Maksymenko, Daniil
%A Turuta, Oleksii
%A Yerokhin, Andriy
%A Babii, Andrii
%A Turuta, Olena
%Y Romanyshyn, Mariana
%S Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F saichyshyna-etal-2023-extension
%X We share the results of the project within the well-known Multi30k dataset dedicated to improving machine translation of text from English into Ukrainian. The main task was to manually prepare the dataset and improve the translation of texts. The importance of collecting such datasets for low-resource languages for improving the quality of machine translation has been discussed. We also studied the features of translations of words and sentences with ambiguous meanings. The collection of multimodal datasets is essential for natural language processing tasks because it allows the development of more complex and comprehensive machine learning models that can understand and analyze different types of data. These models can learn from a variety of data types, including images, text, and audio, for more accurate and meaningful results.
%R 10.18653/v1/2023.unlp-1.7
%U https://aclanthology.org/2023.unlp-1.7
%U https://doi.org/10.18653/v1/2023.unlp-1.7
%P 54-61
Markdown (Informal)
[Extension Multi30K: Multimodal Dataset for Integrated Vision and Language Research in Ukrainian](https://aclanthology.org/2023.unlp-1.7) (Saichyshyna et al., UNLP 2023)
ACL