@inproceedings{hou-etal-2022-syntax,
title = "Syntax-guided Localized Self-attention by Constituency Syntactic Distance",
author = "Hou, Shengyuan and
Kai, Jushi and
Xue, Haotian and
Zhu, Bingyu and
Yuan, Bo and
Huang, Longtao and
Wang, Xinbing and
Lin, Zhouhan",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.173/",
doi = "10.18653/v1/2022.findings-emnlp.173",
pages = "2334--2341",
abstract = "Recent works have revealed that Transformers are implicitly learning the syntactic information in its lower layers from data, albeit is highly dependent on the quality and scale of the training data. However, learning syntactic information from data is not necessary if we can leverage an external syntactic parser, which provides better parsing quality with well-defined syntactic structures. This could potentially improve Transformer`s performance and sample efficiency. In this work, we propose a syntax-guided localized self-attention for Transformer that allows directly incorporating grammar structures from an external constituency parser. It prohibits the attention mechanism to overweight the grammatically distant tokens over close ones. Experimental results show that our model could consistently improve translation performance on a variety of machine translation datasets, ranging from small to large dataset sizes, and with different source languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hou-etal-2022-syntax">
<titleInfo>
<title>Syntax-guided Localized Self-attention by Constituency Syntactic Distance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shengyuan</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jushi</namePart>
<namePart type="family">Kai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haotian</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bingyu</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bo</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Longtao</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinbing</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhouhan</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent works have revealed that Transformers are implicitly learning the syntactic information in its lower layers from data, albeit is highly dependent on the quality and scale of the training data. However, learning syntactic information from data is not necessary if we can leverage an external syntactic parser, which provides better parsing quality with well-defined syntactic structures. This could potentially improve Transformer‘s performance and sample efficiency. In this work, we propose a syntax-guided localized self-attention for Transformer that allows directly incorporating grammar structures from an external constituency parser. It prohibits the attention mechanism to overweight the grammatically distant tokens over close ones. Experimental results show that our model could consistently improve translation performance on a variety of machine translation datasets, ranging from small to large dataset sizes, and with different source languages.</abstract>
<identifier type="citekey">hou-etal-2022-syntax</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.173</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.173/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>2334</start>
<end>2341</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Syntax-guided Localized Self-attention by Constituency Syntactic Distance
%A Hou, Shengyuan
%A Kai, Jushi
%A Xue, Haotian
%A Zhu, Bingyu
%A Yuan, Bo
%A Huang, Longtao
%A Wang, Xinbing
%A Lin, Zhouhan
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F hou-etal-2022-syntax
%X Recent works have revealed that Transformers are implicitly learning the syntactic information in its lower layers from data, albeit is highly dependent on the quality and scale of the training data. However, learning syntactic information from data is not necessary if we can leverage an external syntactic parser, which provides better parsing quality with well-defined syntactic structures. This could potentially improve Transformer‘s performance and sample efficiency. In this work, we propose a syntax-guided localized self-attention for Transformer that allows directly incorporating grammar structures from an external constituency parser. It prohibits the attention mechanism to overweight the grammatically distant tokens over close ones. Experimental results show that our model could consistently improve translation performance on a variety of machine translation datasets, ranging from small to large dataset sizes, and with different source languages.
%R 10.18653/v1/2022.findings-emnlp.173
%U https://aclanthology.org/2022.findings-emnlp.173/
%U https://doi.org/10.18653/v1/2022.findings-emnlp.173
%P 2334-2341
Markdown (Informal)
[Syntax-guided Localized Self-attention by Constituency Syntactic Distance](https://aclanthology.org/2022.findings-emnlp.173/) (Hou et al., Findings 2022)
ACL
- Shengyuan Hou, Jushi Kai, Haotian Xue, Bingyu Zhu, Bo Yuan, Longtao Huang, Xinbing Wang, and Zhouhan Lin. 2022. Syntax-guided Localized Self-attention by Constituency Syntactic Distance. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 2334–2341, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.