@inproceedings{xu-etal-2022-joint,
title = "Joint Generation of Captions and Subtitles with Dual Decoding",
author = "Xu, Jitao and
Buet, Fran{\c{c}}ois and
Crego, Josep and
Bertin-Lem{\'e}e, Elise and
Yvon, Fran{\c{c}}ois",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Costa-juss{\`a}, Marta",
booktitle = "Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)",
month = may,
year = "2022",
address = "Dublin, Ireland (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.iwslt-1.7/",
doi = "10.18653/v1/2022.iwslt-1.7",
pages = "74--82",
abstract = "As the amount of audio-visual content increases, the need to develop automatic captioning and subtitling solutions to match the expectations of a growing international audience appears as the only viable way to boost throughput and lower the related post-production costs. Automatic captioning and subtitling often need to be tightly intertwined to achieve an appropriate level of consistency and synchronization with each other and with the video signal. In this work, we assess a dual decoding scheme to achieve a strong coupling between these two tasks and show how adequacy and consistency are increased, with virtually no additional cost in terms of model size and training complexity."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2022-joint">
<titleInfo>
<title>Joint Generation of Captions and Subtitles with Dual Decoding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jitao</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">François</namePart>
<namePart type="family">Buet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josep</namePart>
<namePart type="family">Crego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elise</namePart>
<namePart type="family">Bertin-Lemée</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">François</namePart>
<namePart type="family">Yvon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As the amount of audio-visual content increases, the need to develop automatic captioning and subtitling solutions to match the expectations of a growing international audience appears as the only viable way to boost throughput and lower the related post-production costs. Automatic captioning and subtitling often need to be tightly intertwined to achieve an appropriate level of consistency and synchronization with each other and with the video signal. In this work, we assess a dual decoding scheme to achieve a strong coupling between these two tasks and show how adequacy and consistency are increased, with virtually no additional cost in terms of model size and training complexity.</abstract>
<identifier type="citekey">xu-etal-2022-joint</identifier>
<identifier type="doi">10.18653/v1/2022.iwslt-1.7</identifier>
<location>
<url>https://aclanthology.org/2022.iwslt-1.7/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>74</start>
<end>82</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Joint Generation of Captions and Subtitles with Dual Decoding
%A Xu, Jitao
%A Buet, François
%A Crego, Josep
%A Bertin-Lemée, Elise
%A Yvon, François
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Costa-jussà, Marta
%S Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland (in-person and online)
%F xu-etal-2022-joint
%X As the amount of audio-visual content increases, the need to develop automatic captioning and subtitling solutions to match the expectations of a growing international audience appears as the only viable way to boost throughput and lower the related post-production costs. Automatic captioning and subtitling often need to be tightly intertwined to achieve an appropriate level of consistency and synchronization with each other and with the video signal. In this work, we assess a dual decoding scheme to achieve a strong coupling between these two tasks and show how adequacy and consistency are increased, with virtually no additional cost in terms of model size and training complexity.
%R 10.18653/v1/2022.iwslt-1.7
%U https://aclanthology.org/2022.iwslt-1.7/
%U https://doi.org/10.18653/v1/2022.iwslt-1.7
%P 74-82
Markdown (Informal)
[Joint Generation of Captions and Subtitles with Dual Decoding](https://aclanthology.org/2022.iwslt-1.7/) (Xu et al., IWSLT 2022)
ACL
- Jitao Xu, François Buet, Josep Crego, Elise Bertin-Lemée, and François Yvon. 2022. Joint Generation of Captions and Subtitles with Dual Decoding. In Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022), pages 74–82, Dublin, Ireland (in-person and online). Association for Computational Linguistics.