@inproceedings{yoshida-etal-2024-tree,
title = "Tree-Planted Transformers: Unidirectional Transformer Language Models with Implicit Syntactic Supervision",
author = "Yoshida, Ryo and
Someya, Taiga and
Oseki, Yohei",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.303",
doi = "10.18653/v1/2024.findings-acl.303",
pages = "5120--5134",
abstract = "Syntactic Language Models (SLMs) can be trained efficiently to reach relatively high performance; however, they have trouble with inference efficiency due to the explicit generation of syntactic structures. In this paper, we propose a new method dubbed tree-planting: instead of explicitly generating syntactic structures, we {``}plant{''} trees into attention weights of unidirectional Transformer LMs to implicitly reflect syntactic structures of natural language. Specifically, unidirectional Transformer LMs trained with tree-planting will be called Tree-Planted Transformers (TPT), which inherit the training efficiency from SLMs without changing the inference efficiency of their underlying Transformer LMs. Targeted syntactic evaluations on the SyntaxGym benchmark demonstrated that TPTs, despite the lack of explicit generation of syntactic structures, significantly outperformed not only vanilla Transformer LMs but also various SLMs that generate hundreds of syntactic structures in parallel. This result suggests that TPTs can learn human-like syntactic knowledge as data-efficiently as SLMs while maintaining the modeling space of Transformer LMs unchanged.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yoshida-etal-2024-tree">
<titleInfo>
<title>Tree-Planted Transformers: Unidirectional Transformer Language Models with Implicit Syntactic Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryo</namePart>
<namePart type="family">Yoshida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taiga</namePart>
<namePart type="family">Someya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Oseki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Syntactic Language Models (SLMs) can be trained efficiently to reach relatively high performance; however, they have trouble with inference efficiency due to the explicit generation of syntactic structures. In this paper, we propose a new method dubbed tree-planting: instead of explicitly generating syntactic structures, we “plant” trees into attention weights of unidirectional Transformer LMs to implicitly reflect syntactic structures of natural language. Specifically, unidirectional Transformer LMs trained with tree-planting will be called Tree-Planted Transformers (TPT), which inherit the training efficiency from SLMs without changing the inference efficiency of their underlying Transformer LMs. Targeted syntactic evaluations on the SyntaxGym benchmark demonstrated that TPTs, despite the lack of explicit generation of syntactic structures, significantly outperformed not only vanilla Transformer LMs but also various SLMs that generate hundreds of syntactic structures in parallel. This result suggests that TPTs can learn human-like syntactic knowledge as data-efficiently as SLMs while maintaining the modeling space of Transformer LMs unchanged.</abstract>
<identifier type="citekey">yoshida-etal-2024-tree</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.303</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.303</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>5120</start>
<end>5134</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tree-Planted Transformers: Unidirectional Transformer Language Models with Implicit Syntactic Supervision
%A Yoshida, Ryo
%A Someya, Taiga
%A Oseki, Yohei
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F yoshida-etal-2024-tree
%X Syntactic Language Models (SLMs) can be trained efficiently to reach relatively high performance; however, they have trouble with inference efficiency due to the explicit generation of syntactic structures. In this paper, we propose a new method dubbed tree-planting: instead of explicitly generating syntactic structures, we “plant” trees into attention weights of unidirectional Transformer LMs to implicitly reflect syntactic structures of natural language. Specifically, unidirectional Transformer LMs trained with tree-planting will be called Tree-Planted Transformers (TPT), which inherit the training efficiency from SLMs without changing the inference efficiency of their underlying Transformer LMs. Targeted syntactic evaluations on the SyntaxGym benchmark demonstrated that TPTs, despite the lack of explicit generation of syntactic structures, significantly outperformed not only vanilla Transformer LMs but also various SLMs that generate hundreds of syntactic structures in parallel. This result suggests that TPTs can learn human-like syntactic knowledge as data-efficiently as SLMs while maintaining the modeling space of Transformer LMs unchanged.
%R 10.18653/v1/2024.findings-acl.303
%U https://aclanthology.org/2024.findings-acl.303
%U https://doi.org/10.18653/v1/2024.findings-acl.303
%P 5120-5134
Markdown (Informal)
[Tree-Planted Transformers: Unidirectional Transformer Language Models with Implicit Syntactic Supervision](https://aclanthology.org/2024.findings-acl.303) (Yoshida et al., Findings 2024)
ACL