@inproceedings{derby-etal-2020-analysing,
title = "Analysing Word Representation from the Input and Output Embeddings in Neural Network Language Models",
author = "Derby, Steven and
Miller, Paul and
Devereux, Barry",
editor = "Fern{\'a}ndez, Raquel and
Linzen, Tal",
booktitle = "Proceedings of the 24th Conference on Computational Natural Language Learning",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.conll-1.36",
doi = "10.18653/v1/2020.conll-1.36",
pages = "442--454",
abstract = "Researchers have recently demonstrated that tying the neural weights between the input look-up table and the output classification layer can improve training and lower perplexity on sequence learning tasks such as language modelling. Such a procedure is possible due to the design of the softmax classification layer, which previous work has shown to comprise a viable set of semantic representations for the model vocabulary, and these these output embeddings are known to perform well on word similarity benchmarks. In this paper, we make meaningful comparisons between the input and output embeddings and other SOTA distributional models to gain a better understanding of the types of information they represent. We also construct a new set of word embeddings using the output embeddings to create locally-optimal approximations for the intermediate representations from the language model. These locally-optimal embeddings demonstrate excellent performance across all our evaluations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="derby-etal-2020-analysing">
<titleInfo>
<title>Analysing Word Representation from the Input and Output Embeddings in Neural Network Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Derby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Miller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Devereux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Researchers have recently demonstrated that tying the neural weights between the input look-up table and the output classification layer can improve training and lower perplexity on sequence learning tasks such as language modelling. Such a procedure is possible due to the design of the softmax classification layer, which previous work has shown to comprise a viable set of semantic representations for the model vocabulary, and these these output embeddings are known to perform well on word similarity benchmarks. In this paper, we make meaningful comparisons between the input and output embeddings and other SOTA distributional models to gain a better understanding of the types of information they represent. We also construct a new set of word embeddings using the output embeddings to create locally-optimal approximations for the intermediate representations from the language model. These locally-optimal embeddings demonstrate excellent performance across all our evaluations.</abstract>
<identifier type="citekey">derby-etal-2020-analysing</identifier>
<identifier type="doi">10.18653/v1/2020.conll-1.36</identifier>
<location>
<url>https://aclanthology.org/2020.conll-1.36</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>442</start>
<end>454</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analysing Word Representation from the Input and Output Embeddings in Neural Network Language Models
%A Derby, Steven
%A Miller, Paul
%A Devereux, Barry
%Y Fernández, Raquel
%Y Linzen, Tal
%S Proceedings of the 24th Conference on Computational Natural Language Learning
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F derby-etal-2020-analysing
%X Researchers have recently demonstrated that tying the neural weights between the input look-up table and the output classification layer can improve training and lower perplexity on sequence learning tasks such as language modelling. Such a procedure is possible due to the design of the softmax classification layer, which previous work has shown to comprise a viable set of semantic representations for the model vocabulary, and these these output embeddings are known to perform well on word similarity benchmarks. In this paper, we make meaningful comparisons between the input and output embeddings and other SOTA distributional models to gain a better understanding of the types of information they represent. We also construct a new set of word embeddings using the output embeddings to create locally-optimal approximations for the intermediate representations from the language model. These locally-optimal embeddings demonstrate excellent performance across all our evaluations.
%R 10.18653/v1/2020.conll-1.36
%U https://aclanthology.org/2020.conll-1.36
%U https://doi.org/10.18653/v1/2020.conll-1.36
%P 442-454
Markdown (Informal)
[Analysing Word Representation from the Input and Output Embeddings in Neural Network Language Models](https://aclanthology.org/2020.conll-1.36) (Derby et al., CoNLL 2020)
ACL