@inproceedings{gemmell-dalton-2023-toolwriter,
title = "{T}ool{W}riter: Question Specific Tool Synthesis for Tabular Data",
author = "Gemmell, Carlos and
Dalton, Jeff",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.1003/",
doi = "10.18653/v1/2023.emnlp-main.1003",
pages = "16137--16148",
abstract = "Tabular question answering (TQA) presents a challenging setting for neural systems by requiring joint reasoning of natural language with large amounts of semi-structured data. Unlike humans who use programmatic tools like filters to transform data before processing, language models in TQA process tables directly, resulting in information loss as table size increases. In this paper we propose ToolWriter to generate query specific programs and detect when to apply them to transform tables and align them with the TQA model`s capabilities. Focusing Toolwriter to generate row-filtering tools improves the state-of-the-art for WikiTableQuestions and WikiSQL with the most performance gained on long tables. By investigating headroom, our work highlights the broader potential for programmatic tools combined with neural components to manipulate large amounts of structured data."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gemmell-dalton-2023-toolwriter">
<titleInfo>
<title>ToolWriter: Question Specific Tool Synthesis for Tabular Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Gemmell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeff</namePart>
<namePart type="family">Dalton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Tabular question answering (TQA) presents a challenging setting for neural systems by requiring joint reasoning of natural language with large amounts of semi-structured data. Unlike humans who use programmatic tools like filters to transform data before processing, language models in TQA process tables directly, resulting in information loss as table size increases. In this paper we propose ToolWriter to generate query specific programs and detect when to apply them to transform tables and align them with the TQA model‘s capabilities. Focusing Toolwriter to generate row-filtering tools improves the state-of-the-art for WikiTableQuestions and WikiSQL with the most performance gained on long tables. By investigating headroom, our work highlights the broader potential for programmatic tools combined with neural components to manipulate large amounts of structured data.</abstract>
<identifier type="citekey">gemmell-dalton-2023-toolwriter</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.1003</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.1003/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>16137</start>
<end>16148</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ToolWriter: Question Specific Tool Synthesis for Tabular Data
%A Gemmell, Carlos
%A Dalton, Jeff
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F gemmell-dalton-2023-toolwriter
%X Tabular question answering (TQA) presents a challenging setting for neural systems by requiring joint reasoning of natural language with large amounts of semi-structured data. Unlike humans who use programmatic tools like filters to transform data before processing, language models in TQA process tables directly, resulting in information loss as table size increases. In this paper we propose ToolWriter to generate query specific programs and detect when to apply them to transform tables and align them with the TQA model‘s capabilities. Focusing Toolwriter to generate row-filtering tools improves the state-of-the-art for WikiTableQuestions and WikiSQL with the most performance gained on long tables. By investigating headroom, our work highlights the broader potential for programmatic tools combined with neural components to manipulate large amounts of structured data.
%R 10.18653/v1/2023.emnlp-main.1003
%U https://aclanthology.org/2023.emnlp-main.1003/
%U https://doi.org/10.18653/v1/2023.emnlp-main.1003
%P 16137-16148
Markdown (Informal)
[ToolWriter: Question Specific Tool Synthesis for Tabular Data](https://aclanthology.org/2023.emnlp-main.1003/) (Gemmell & Dalton, EMNLP 2023)
ACL