@inproceedings{guo-etal-2023-isotropy,
title = "Isotropy-Enhanced Conditional Masked Language Models",
author = "Guo, Pei and
Xiao, Yisheng and
Li, Juntao and
Ji, Yixin and
Zhang, Min",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.555/",
doi = "10.18653/v1/2023.findings-emnlp.555",
pages = "8278--8289",
abstract = "Non-autoregressive models have been widely used for various text generation tasks to accelerate the inference process but at the cost of generation quality to some extent. To achieve a good balance between inference speedup and generation quality, iterative NAR models like CMLM and Disco are proposed. Researchers have made much follow-up progress based on them, and some recent iterative models can achieve very promising performance while maintaining significant speedup. In this paper, we give more insights into iterative NAR models by exploring the anisotropic problem, i.e., the representations of distinct predicted target tokens are similar and indiscriminative. Upon the confirmation of the anisotropic problem in iterative NAR models, we first analyze the effectiveness of the contrastive learning method and further propose the Look Neighbors strategy to enhance the learning of token representations during training. Experiments on 4 WMT datasets show that our methods consistently improve the performance as well as alleviate the anisotropic problem of the conditional masked language model, even outperforming the current SoTA result on WMT14 EN $\rightarrow$ DE."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guo-etal-2023-isotropy">
<titleInfo>
<title>Isotropy-Enhanced Conditional Masked Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pei</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yisheng</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Non-autoregressive models have been widely used for various text generation tasks to accelerate the inference process but at the cost of generation quality to some extent. To achieve a good balance between inference speedup and generation quality, iterative NAR models like CMLM and Disco are proposed. Researchers have made much follow-up progress based on them, and some recent iterative models can achieve very promising performance while maintaining significant speedup. In this paper, we give more insights into iterative NAR models by exploring the anisotropic problem, i.e., the representations of distinct predicted target tokens are similar and indiscriminative. Upon the confirmation of the anisotropic problem in iterative NAR models, we first analyze the effectiveness of the contrastive learning method and further propose the Look Neighbors strategy to enhance the learning of token representations during training. Experiments on 4 WMT datasets show that our methods consistently improve the performance as well as alleviate the anisotropic problem of the conditional masked language model, even outperforming the current SoTA result on WMT14 EN \rightarrow DE.</abstract>
<identifier type="citekey">guo-etal-2023-isotropy</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.555</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.555/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>8278</start>
<end>8289</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Isotropy-Enhanced Conditional Masked Language Models
%A Guo, Pei
%A Xiao, Yisheng
%A Li, Juntao
%A Ji, Yixin
%A Zhang, Min
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F guo-etal-2023-isotropy
%X Non-autoregressive models have been widely used for various text generation tasks to accelerate the inference process but at the cost of generation quality to some extent. To achieve a good balance between inference speedup and generation quality, iterative NAR models like CMLM and Disco are proposed. Researchers have made much follow-up progress based on them, and some recent iterative models can achieve very promising performance while maintaining significant speedup. In this paper, we give more insights into iterative NAR models by exploring the anisotropic problem, i.e., the representations of distinct predicted target tokens are similar and indiscriminative. Upon the confirmation of the anisotropic problem in iterative NAR models, we first analyze the effectiveness of the contrastive learning method and further propose the Look Neighbors strategy to enhance the learning of token representations during training. Experiments on 4 WMT datasets show that our methods consistently improve the performance as well as alleviate the anisotropic problem of the conditional masked language model, even outperforming the current SoTA result on WMT14 EN \rightarrow DE.
%R 10.18653/v1/2023.findings-emnlp.555
%U https://aclanthology.org/2023.findings-emnlp.555/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.555
%P 8278-8289
Markdown (Informal)
[Isotropy-Enhanced Conditional Masked Language Models](https://aclanthology.org/2023.findings-emnlp.555/) (Guo et al., Findings 2023)
ACL
- Pei Guo, Yisheng Xiao, Juntao Li, Yixin Ji, and Min Zhang. 2023. Isotropy-Enhanced Conditional Masked Language Models. In Findings of the Association for Computational Linguistics: EMNLP 2023, pages 8278–8289, Singapore. Association for Computational Linguistics.