@inproceedings{simoulin-crabbe-2022-unifying,
title = "Unifying Parsing and Tree-Structured Models for Generating Sentence Semantic Representations",
author = "Simoulin, Antoine and
Crabb{\'e}, Benoit",
editor = "Ippolito, Daphne and
Li, Liunian Harold and
Pacheco, Maria Leonor and
Chen, Danqi and
Xue, Nianwen",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-srw.33/",
doi = "10.18653/v1/2022.naacl-srw.33",
pages = "267--276",
abstract = "We introduce a novel tree-based model that learns its composition function together with its structure. The architecture produces sentence embeddings by composing words according to an induced syntactic tree. The parsing and the composition functions are explicitly connected and, therefore, learned jointly. As a result, the sentence embedding is computed according to an interpretable linguistic pattern and may be used on any downstream task. We evaluate our encoder on downstream tasks, and we observe that it outperforms tree-based models relying on external parsers. In some configurations, it is even competitive with Bert base model. Our model is capable of supporting multiple parser architectures. We exploit this property to conduct an ablation study by comparing different parser initializations. We explore to which extent the trees produced by our model compare with linguistic structures and how this initialization impacts downstream performances. We empirically observe that downstream supervision troubles producing stable parses and preserving linguistically relevant structures."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="simoulin-crabbe-2022-unifying">
<titleInfo>
<title>Unifying Parsing and Tree-Structured Models for Generating Sentence Semantic Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Simoulin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benoit</namePart>
<namePart type="family">Crabbé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daphne</namePart>
<namePart type="family">Ippolito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liunian</namePart>
<namePart type="given">Harold</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Leonor</namePart>
<namePart type="family">Pacheco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danqi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce a novel tree-based model that learns its composition function together with its structure. The architecture produces sentence embeddings by composing words according to an induced syntactic tree. The parsing and the composition functions are explicitly connected and, therefore, learned jointly. As a result, the sentence embedding is computed according to an interpretable linguistic pattern and may be used on any downstream task. We evaluate our encoder on downstream tasks, and we observe that it outperforms tree-based models relying on external parsers. In some configurations, it is even competitive with Bert base model. Our model is capable of supporting multiple parser architectures. We exploit this property to conduct an ablation study by comparing different parser initializations. We explore to which extent the trees produced by our model compare with linguistic structures and how this initialization impacts downstream performances. We empirically observe that downstream supervision troubles producing stable parses and preserving linguistically relevant structures.</abstract>
<identifier type="citekey">simoulin-crabbe-2022-unifying</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-srw.33</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-srw.33/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>267</start>
<end>276</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unifying Parsing and Tree-Structured Models for Generating Sentence Semantic Representations
%A Simoulin, Antoine
%A Crabbé, Benoit
%Y Ippolito, Daphne
%Y Li, Liunian Harold
%Y Pacheco, Maria Leonor
%Y Chen, Danqi
%Y Xue, Nianwen
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Student Research Workshop
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F simoulin-crabbe-2022-unifying
%X We introduce a novel tree-based model that learns its composition function together with its structure. The architecture produces sentence embeddings by composing words according to an induced syntactic tree. The parsing and the composition functions are explicitly connected and, therefore, learned jointly. As a result, the sentence embedding is computed according to an interpretable linguistic pattern and may be used on any downstream task. We evaluate our encoder on downstream tasks, and we observe that it outperforms tree-based models relying on external parsers. In some configurations, it is even competitive with Bert base model. Our model is capable of supporting multiple parser architectures. We exploit this property to conduct an ablation study by comparing different parser initializations. We explore to which extent the trees produced by our model compare with linguistic structures and how this initialization impacts downstream performances. We empirically observe that downstream supervision troubles producing stable parses and preserving linguistically relevant structures.
%R 10.18653/v1/2022.naacl-srw.33
%U https://aclanthology.org/2022.naacl-srw.33/
%U https://doi.org/10.18653/v1/2022.naacl-srw.33
%P 267-276
Markdown (Informal)
[Unifying Parsing and Tree-Structured Models for Generating Sentence Semantic Representations](https://aclanthology.org/2022.naacl-srw.33/) (Simoulin & Crabbé, NAACL 2022)
ACL