@article{van-der-lee-etal-2023-neural,
title = "Neural Data-to-Text Generation Based on Small Datasets: Comparing the Added Value of Two Semi-Supervised Learning Approaches on Top of a Large Language Model",
author = "van der Lee, Chris and
Castro Ferreira, Thiago and
Emmery, Chris and
Wiltshire, Travis J. and
Krahmer, Emiel",
journal = "Computational Linguistics",
month = sep,
year = "2023",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2023.cl-3.2/",
doi = "10.1162/coli_a_00484",
pages = "555--611",
abstract = "This study discusses the effect of semi-supervised learning in combination with pretrained language models for data-to-text generation. It is not known whether semi-supervised learning is still helpful when a large-scale language model is also supplemented. This study aims to answer this question by comparing a data-to-text system only supplemented with a language model, to two data-to-text systems that are additionally enriched by a data augmentation or a pseudo-labeling semi-supervised learning approach. Results show that semi-supervised learning results in higher scores on diversity metrics. In terms of output quality, extending the training set of a data-to-text system with a language model using the pseudo-labeling approach did increase text quality scores, but the data augmentation approach yielded similar scores to the system without training set extension. These results indicate that semi-supervised learning approaches can bolster output quality and diversity, even when a language model is also present."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="van-der-lee-etal-2023-neural">
<titleInfo>
<title>Neural Data-to-Text Generation Based on Small Datasets: Comparing the Added Value of Two Semi-Supervised Learning Approaches on Top of a Large Language Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">van der Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Castro Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Emmery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Travis</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Wiltshire</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Krahmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>This study discusses the effect of semi-supervised learning in combination with pretrained language models for data-to-text generation. It is not known whether semi-supervised learning is still helpful when a large-scale language model is also supplemented. This study aims to answer this question by comparing a data-to-text system only supplemented with a language model, to two data-to-text systems that are additionally enriched by a data augmentation or a pseudo-labeling semi-supervised learning approach. Results show that semi-supervised learning results in higher scores on diversity metrics. In terms of output quality, extending the training set of a data-to-text system with a language model using the pseudo-labeling approach did increase text quality scores, but the data augmentation approach yielded similar scores to the system without training set extension. These results indicate that semi-supervised learning approaches can bolster output quality and diversity, even when a language model is also present.</abstract>
<identifier type="citekey">van-der-lee-etal-2023-neural</identifier>
<identifier type="doi">10.1162/coli_a_00484</identifier>
<location>
<url>https://aclanthology.org/2023.cl-3.2/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>555</start>
<end>611</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Neural Data-to-Text Generation Based on Small Datasets: Comparing the Added Value of Two Semi-Supervised Learning Approaches on Top of a Large Language Model
%A van der Lee, Chris
%A Castro Ferreira, Thiago
%A Emmery, Chris
%A Wiltshire, Travis J.
%A Krahmer, Emiel
%J Computational Linguistics
%D 2023
%8 September
%I MIT Press
%C Cambridge, MA
%F van-der-lee-etal-2023-neural
%X This study discusses the effect of semi-supervised learning in combination with pretrained language models for data-to-text generation. It is not known whether semi-supervised learning is still helpful when a large-scale language model is also supplemented. This study aims to answer this question by comparing a data-to-text system only supplemented with a language model, to two data-to-text systems that are additionally enriched by a data augmentation or a pseudo-labeling semi-supervised learning approach. Results show that semi-supervised learning results in higher scores on diversity metrics. In terms of output quality, extending the training set of a data-to-text system with a language model using the pseudo-labeling approach did increase text quality scores, but the data augmentation approach yielded similar scores to the system without training set extension. These results indicate that semi-supervised learning approaches can bolster output quality and diversity, even when a language model is also present.
%R 10.1162/coli_a_00484
%U https://aclanthology.org/2023.cl-3.2/
%U https://doi.org/10.1162/coli_a_00484
%P 555-611
Markdown (Informal)
[Neural Data-to-Text Generation Based on Small Datasets: Comparing the Added Value of Two Semi-Supervised Learning Approaches on Top of a Large Language Model](https://aclanthology.org/2023.cl-3.2/) (van der Lee et al., CL 2023)
ACL