@inproceedings{nguyen-etal-2024-text,
title = "Do Text-to-Vis Benchmarks Test Real Use of Visualisations?",
author = "Nguyen, Hy and
He, Xuefei and
Reeson, Andrew and
Paris, Cecile and
Poon, Josiah and
Kummerfeld, Jonathan K.",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.423/",
doi = "10.18653/v1/2024.emnlp-main.423",
pages = "7433--7441",
abstract = "Large language models are able to generate code for visualisations in response to simple user requests.This is a useful application and an appealing one for NLP research because plots of data provide grounding for language.However, there are relatively few benchmarks, and those that exist may not be representative of what users do in practice.This paper investigates whether benchmarks reflect real-world use through an empirical study comparing benchmark datasets with code from public repositories.Our findings reveal a substantial gap, with evaluations not testing the same distribution of chart types, attributes, and actions as real-world examples.One dataset is representative, but requires extensive modification to become a practical end-to-end benchmark. This shows that new benchmarks are needed to support the development of systems that truly address users' visualisation needs.These observations will guide future data creation, highlighting which features hold genuine significance for users."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2024-text">
<titleInfo>
<title>Do Text-to-Vis Benchmarks Test Real Use of Visualisations?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hy</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuefei</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Reeson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cecile</namePart>
<namePart type="family">Paris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josiah</namePart>
<namePart type="family">Poon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Kummerfeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models are able to generate code for visualisations in response to simple user requests.This is a useful application and an appealing one for NLP research because plots of data provide grounding for language.However, there are relatively few benchmarks, and those that exist may not be representative of what users do in practice.This paper investigates whether benchmarks reflect real-world use through an empirical study comparing benchmark datasets with code from public repositories.Our findings reveal a substantial gap, with evaluations not testing the same distribution of chart types, attributes, and actions as real-world examples.One dataset is representative, but requires extensive modification to become a practical end-to-end benchmark. This shows that new benchmarks are needed to support the development of systems that truly address users’ visualisation needs.These observations will guide future data creation, highlighting which features hold genuine significance for users.</abstract>
<identifier type="citekey">nguyen-etal-2024-text</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.423</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.423/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>7433</start>
<end>7441</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do Text-to-Vis Benchmarks Test Real Use of Visualisations?
%A Nguyen, Hy
%A He, Xuefei
%A Reeson, Andrew
%A Paris, Cecile
%A Poon, Josiah
%A Kummerfeld, Jonathan K.
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F nguyen-etal-2024-text
%X Large language models are able to generate code for visualisations in response to simple user requests.This is a useful application and an appealing one for NLP research because plots of data provide grounding for language.However, there are relatively few benchmarks, and those that exist may not be representative of what users do in practice.This paper investigates whether benchmarks reflect real-world use through an empirical study comparing benchmark datasets with code from public repositories.Our findings reveal a substantial gap, with evaluations not testing the same distribution of chart types, attributes, and actions as real-world examples.One dataset is representative, but requires extensive modification to become a practical end-to-end benchmark. This shows that new benchmarks are needed to support the development of systems that truly address users’ visualisation needs.These observations will guide future data creation, highlighting which features hold genuine significance for users.
%R 10.18653/v1/2024.emnlp-main.423
%U https://aclanthology.org/2024.emnlp-main.423/
%U https://doi.org/10.18653/v1/2024.emnlp-main.423
%P 7433-7441
Markdown (Informal)
[Do Text-to-Vis Benchmarks Test Real Use of Visualisations?](https://aclanthology.org/2024.emnlp-main.423/) (Nguyen et al., EMNLP 2024)
ACL
- Hy Nguyen, Xuefei He, Andrew Reeson, Cecile Paris, Josiah Poon, and Jonathan K. Kummerfeld. 2024. Do Text-to-Vis Benchmarks Test Real Use of Visualisations?. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 7433–7441, Miami, Florida, USA. Association for Computational Linguistics.