@inproceedings{akomeah-etal-2021-ur,
title = "{UR}@{NLP}{\_}{A}{\_}{T}eam @ {G}erm{E}val 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments",
author = "Akomeah, Kwabena Odame and
Kruschwitz, Udo and
Ludwig, Bernd",
editor = "Risch, Julian and
Stoll, Anke and
Wilms, Lena and
Wiegand, Michael",
booktitle = "Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments",
month = sep,
year = "2021",
address = "Duesseldorf, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.germeval-1.14/",
pages = "95--99",
abstract = "In this paper, we report on our approach to addressing the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments for the German language. We submitted three runs for each subtask based on ensembles of three models each using contextual embeddings from pre-trained language models using SVM and neural-network-based classifiers. We include language-specific as well as language-agnostic language models {--} both with and without fine-tuning. We observe that for the runs we submitted that the SVM models overfitted the training data and this affected the aggregation method (simple majority voting) of the ensembles. The model records a lower performance on the test set than on the training set. Exploring the issue of overfitting we uncovered that due to a bug in the pipeline the runs we submitted had not been trained on the full set but only on a small training set. Therefore in this paper we also include the results we get when trained on the full training set which demonstrate the power of ensembles."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="akomeah-etal-2021-ur">
<titleInfo>
<title>UR@NLP_A_Team @ GermEval 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kwabena</namePart>
<namePart type="given">Odame</namePart>
<namePart type="family">Akomeah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Kruschwitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernd</namePart>
<namePart type="family">Ludwig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Risch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anke</namePart>
<namePart type="family">Stoll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lena</namePart>
<namePart type="family">Wilms</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Wiegand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Duesseldorf, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we report on our approach to addressing the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments for the German language. We submitted three runs for each subtask based on ensembles of three models each using contextual embeddings from pre-trained language models using SVM and neural-network-based classifiers. We include language-specific as well as language-agnostic language models – both with and without fine-tuning. We observe that for the runs we submitted that the SVM models overfitted the training data and this affected the aggregation method (simple majority voting) of the ensembles. The model records a lower performance on the test set than on the training set. Exploring the issue of overfitting we uncovered that due to a bug in the pipeline the runs we submitted had not been trained on the full set but only on a small training set. Therefore in this paper we also include the results we get when trained on the full training set which demonstrate the power of ensembles.</abstract>
<identifier type="citekey">akomeah-etal-2021-ur</identifier>
<location>
<url>https://aclanthology.org/2021.germeval-1.14/</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>95</start>
<end>99</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UR@NLP_A_Team @ GermEval 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments
%A Akomeah, Kwabena Odame
%A Kruschwitz, Udo
%A Ludwig, Bernd
%Y Risch, Julian
%Y Stoll, Anke
%Y Wilms, Lena
%Y Wiegand, Michael
%S Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments
%D 2021
%8 September
%I Association for Computational Linguistics
%C Duesseldorf, Germany
%F akomeah-etal-2021-ur
%X In this paper, we report on our approach to addressing the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments for the German language. We submitted three runs for each subtask based on ensembles of three models each using contextual embeddings from pre-trained language models using SVM and neural-network-based classifiers. We include language-specific as well as language-agnostic language models – both with and without fine-tuning. We observe that for the runs we submitted that the SVM models overfitted the training data and this affected the aggregation method (simple majority voting) of the ensembles. The model records a lower performance on the test set than on the training set. Exploring the issue of overfitting we uncovered that due to a bug in the pipeline the runs we submitted had not been trained on the full set but only on a small training set. Therefore in this paper we also include the results we get when trained on the full training set which demonstrate the power of ensembles.
%U https://aclanthology.org/2021.germeval-1.14/
%P 95-99
Markdown (Informal)
[UR@NLP_A_Team @ GermEval 2021: Ensemble-based Classification of Toxic, Engaging and Fact-Claiming Comments](https://aclanthology.org/2021.germeval-1.14/) (Akomeah et al., GermEval 2021)
ACL