@inproceedings{zhu-etal-2022-shot,
title = "Few-shot initializing of Active Learner via Meta-Learning",
author = "Zhu, Zi Long and
Yadav, Vikrant and
Afzal, Zubair and
Tsatsaronis, George",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.80",
doi = "10.18653/v1/2022.findings-emnlp.80",
pages = "1117--1133",
abstract = "Despite the important evolutions in few-shot and zero-shot learning techniques, domain specific applications still require expert knowledge and significant effort in annotating and labeling a large volume of unstructured textual data. To mitigate this problem, active learning, and meta-learning attempt to reach a high performance with the least amount of labeled data. In this paper, we introduce a novel approach to combine both lines of work by initializing an active learner with meta-learned parameters obtained through meta-training on tasks similar to the target task during active learning. In this approach we use the pre-trained BERT as our text-encoder and meta-learn its parameters with LEOPARD, which extends the model-agnostic meta-learning method by generating task dependent softmax weights to enable learning across tasks with different number of classes. We demonstrate the effectiveness of our method by performing active learning on five natural language understanding tasks and six datasets with five different acquisition functions. We train two different meta-initializations, and we use the pre-trained BERT base initialization as baseline. We observe that our approach performs better than the baseline at low budget, especially when closely related tasks were present during meta-learning. Moreover, our results show that better performance in the initial phase, i.e., with fewer labeled samples, leads to better performance when larger acquisition batches are used. We also perform an ablation study of the proposed method, showing that active learning with only the meta-learned weights is beneficial and adding the meta-learned learning rates and generating the softmax have negative consequences for the performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2022-shot">
<titleInfo>
<title>Few-shot initializing of Active Learner via Meta-Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zi</namePart>
<namePart type="given">Long</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vikrant</namePart>
<namePart type="family">Yadav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zubair</namePart>
<namePart type="family">Afzal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Tsatsaronis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the important evolutions in few-shot and zero-shot learning techniques, domain specific applications still require expert knowledge and significant effort in annotating and labeling a large volume of unstructured textual data. To mitigate this problem, active learning, and meta-learning attempt to reach a high performance with the least amount of labeled data. In this paper, we introduce a novel approach to combine both lines of work by initializing an active learner with meta-learned parameters obtained through meta-training on tasks similar to the target task during active learning. In this approach we use the pre-trained BERT as our text-encoder and meta-learn its parameters with LEOPARD, which extends the model-agnostic meta-learning method by generating task dependent softmax weights to enable learning across tasks with different number of classes. We demonstrate the effectiveness of our method by performing active learning on five natural language understanding tasks and six datasets with five different acquisition functions. We train two different meta-initializations, and we use the pre-trained BERT base initialization as baseline. We observe that our approach performs better than the baseline at low budget, especially when closely related tasks were present during meta-learning. Moreover, our results show that better performance in the initial phase, i.e., with fewer labeled samples, leads to better performance when larger acquisition batches are used. We also perform an ablation study of the proposed method, showing that active learning with only the meta-learned weights is beneficial and adding the meta-learned learning rates and generating the softmax have negative consequences for the performance.</abstract>
<identifier type="citekey">zhu-etal-2022-shot</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.80</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.80</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>1117</start>
<end>1133</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Few-shot initializing of Active Learner via Meta-Learning
%A Zhu, Zi Long
%A Yadav, Vikrant
%A Afzal, Zubair
%A Tsatsaronis, George
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F zhu-etal-2022-shot
%X Despite the important evolutions in few-shot and zero-shot learning techniques, domain specific applications still require expert knowledge and significant effort in annotating and labeling a large volume of unstructured textual data. To mitigate this problem, active learning, and meta-learning attempt to reach a high performance with the least amount of labeled data. In this paper, we introduce a novel approach to combine both lines of work by initializing an active learner with meta-learned parameters obtained through meta-training on tasks similar to the target task during active learning. In this approach we use the pre-trained BERT as our text-encoder and meta-learn its parameters with LEOPARD, which extends the model-agnostic meta-learning method by generating task dependent softmax weights to enable learning across tasks with different number of classes. We demonstrate the effectiveness of our method by performing active learning on five natural language understanding tasks and six datasets with five different acquisition functions. We train two different meta-initializations, and we use the pre-trained BERT base initialization as baseline. We observe that our approach performs better than the baseline at low budget, especially when closely related tasks were present during meta-learning. Moreover, our results show that better performance in the initial phase, i.e., with fewer labeled samples, leads to better performance when larger acquisition batches are used. We also perform an ablation study of the proposed method, showing that active learning with only the meta-learned weights is beneficial and adding the meta-learned learning rates and generating the softmax have negative consequences for the performance.
%R 10.18653/v1/2022.findings-emnlp.80
%U https://aclanthology.org/2022.findings-emnlp.80
%U https://doi.org/10.18653/v1/2022.findings-emnlp.80
%P 1117-1133
Markdown (Informal)
[Few-shot initializing of Active Learner via Meta-Learning](https://aclanthology.org/2022.findings-emnlp.80) (Zhu et al., Findings 2022)
ACL
- Zi Long Zhu, Vikrant Yadav, Zubair Afzal, and George Tsatsaronis. 2022. Few-shot initializing of Active Learner via Meta-Learning. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 1117–1133, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.