@inproceedings{bhaisaheb-etal-2023-program,
title = "Program Synthesis for Complex {QA} on Charts via Probabilistic Grammar Based Filtered Iterative Back-Translation",
author = "Bhaisaheb, Shabbirhussain and
Paliwal, Shubham and
Patil, Rajaswa and
Patwardhan, Manasi and
Vig, Lovekesh and
Shroff, Gautam",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.189/",
doi = "10.18653/v1/2023.findings-eacl.189",
pages = "2501--2515",
abstract = "Answering complex reasoning questions from chart images is a challenging problem requiring a combination of natural language understanding, fine-grained perception, and analytical reasoning. Current chart-based Question Answering (QA) approaches largely address structural, visual or simple data retrieval-type questions with fixed-vocabulary answers and perform poorly on reasoning queries. We focus on answering realistic, complex, reasoning-based questions where the answer needs to be computed and not selected from a fixed set of choices. Our approach employs a neural semantic parser to transform Natural Language (NL) questions into SQL programs and execute them on a standardized schema populated from the extracted chart contents. In the absence of program annotations, i.e., in a weak supervision setting, we obtain initial SQL predictions from a pre-trained CodeT5 semantic parser and employ Filtered Iterative Back-Translation (FIBT) for iteratively augmenting our NL-SQL training set. The forward (neural semantic parser) and backward (language model) models are initially trained with an external NL-SQL dataset. We iteratively move towards the NL query distribution by generating NL questions from the synthesized SQL programs using a Probabilistic Context-Free Grammar (PCFG) where the production rule probabilities are induced to be inversely proportional to the probabilities in the training data. We filter out the generated NL queries with mismatched structures and compositions. Our FIBT approach achieves State-of-the-Art (SOTA) results on reasoning-based queries in the PlotQA dataset yielding a test accuracy of 60.44{\%}, superseding the previous baselines by a large margin."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhaisaheb-etal-2023-program">
<titleInfo>
<title>Program Synthesis for Complex QA on Charts via Probabilistic Grammar Based Filtered Iterative Back-Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shabbirhussain</namePart>
<namePart type="family">Bhaisaheb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Paliwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajaswa</namePart>
<namePart type="family">Patil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manasi</namePart>
<namePart type="family">Patwardhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lovekesh</namePart>
<namePart type="family">Vig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gautam</namePart>
<namePart type="family">Shroff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Answering complex reasoning questions from chart images is a challenging problem requiring a combination of natural language understanding, fine-grained perception, and analytical reasoning. Current chart-based Question Answering (QA) approaches largely address structural, visual or simple data retrieval-type questions with fixed-vocabulary answers and perform poorly on reasoning queries. We focus on answering realistic, complex, reasoning-based questions where the answer needs to be computed and not selected from a fixed set of choices. Our approach employs a neural semantic parser to transform Natural Language (NL) questions into SQL programs and execute them on a standardized schema populated from the extracted chart contents. In the absence of program annotations, i.e., in a weak supervision setting, we obtain initial SQL predictions from a pre-trained CodeT5 semantic parser and employ Filtered Iterative Back-Translation (FIBT) for iteratively augmenting our NL-SQL training set. The forward (neural semantic parser) and backward (language model) models are initially trained with an external NL-SQL dataset. We iteratively move towards the NL query distribution by generating NL questions from the synthesized SQL programs using a Probabilistic Context-Free Grammar (PCFG) where the production rule probabilities are induced to be inversely proportional to the probabilities in the training data. We filter out the generated NL queries with mismatched structures and compositions. Our FIBT approach achieves State-of-the-Art (SOTA) results on reasoning-based queries in the PlotQA dataset yielding a test accuracy of 60.44%, superseding the previous baselines by a large margin.</abstract>
<identifier type="citekey">bhaisaheb-etal-2023-program</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.189</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.189/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>2501</start>
<end>2515</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Program Synthesis for Complex QA on Charts via Probabilistic Grammar Based Filtered Iterative Back-Translation
%A Bhaisaheb, Shabbirhussain
%A Paliwal, Shubham
%A Patil, Rajaswa
%A Patwardhan, Manasi
%A Vig, Lovekesh
%A Shroff, Gautam
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F bhaisaheb-etal-2023-program
%X Answering complex reasoning questions from chart images is a challenging problem requiring a combination of natural language understanding, fine-grained perception, and analytical reasoning. Current chart-based Question Answering (QA) approaches largely address structural, visual or simple data retrieval-type questions with fixed-vocabulary answers and perform poorly on reasoning queries. We focus on answering realistic, complex, reasoning-based questions where the answer needs to be computed and not selected from a fixed set of choices. Our approach employs a neural semantic parser to transform Natural Language (NL) questions into SQL programs and execute them on a standardized schema populated from the extracted chart contents. In the absence of program annotations, i.e., in a weak supervision setting, we obtain initial SQL predictions from a pre-trained CodeT5 semantic parser and employ Filtered Iterative Back-Translation (FIBT) for iteratively augmenting our NL-SQL training set. The forward (neural semantic parser) and backward (language model) models are initially trained with an external NL-SQL dataset. We iteratively move towards the NL query distribution by generating NL questions from the synthesized SQL programs using a Probabilistic Context-Free Grammar (PCFG) where the production rule probabilities are induced to be inversely proportional to the probabilities in the training data. We filter out the generated NL queries with mismatched structures and compositions. Our FIBT approach achieves State-of-the-Art (SOTA) results on reasoning-based queries in the PlotQA dataset yielding a test accuracy of 60.44%, superseding the previous baselines by a large margin.
%R 10.18653/v1/2023.findings-eacl.189
%U https://aclanthology.org/2023.findings-eacl.189/
%U https://doi.org/10.18653/v1/2023.findings-eacl.189
%P 2501-2515
Markdown (Informal)
[Program Synthesis for Complex QA on Charts via Probabilistic Grammar Based Filtered Iterative Back-Translation](https://aclanthology.org/2023.findings-eacl.189/) (Bhaisaheb et al., Findings 2023)
ACL