@inproceedings{chizhikova-etal-2022-sinai,
title = "{SINAI}@{SMM}4{H}{'}22: Transformers for biomedical social media text mining in {S}panish",
author = "Chizhikova, Mariia and
L{\'o}pez-{\'U}beda, Pilar and
D{\'\i}az-Galiano, Manuel C. and
Ure{\~n}a-L{\'o}pez, L. Alfonso and
Mart{\'\i}n-Valdivia, M. Teresa",
editor = "Gonzalez-Hernandez, Graciela and
Weissenbacher, Davy",
booktitle = "Proceedings of The Seventh Workshop on Social Media Mining for Health Applications, Workshop {\&} Shared Task",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.smm4h-1.8",
pages = "27--30",
abstract = "This paper covers participation of the SINAI team in Tasks 5 and 10 of the Social Media Mining for Health ({\#}SSM4H) workshop at COLING-2022. These tasks focus on leveraging Twitter posts written in Spanish for healthcare research. The objective of Task 5 was to classify tweets reporting COVID-19 symptoms, while Task 10 required identifying disease mentions in Twitter posts. The presented systems explore large RoBERTa language models pre-trained on Twitter data in the case of tweet classification task and general-domain data for the disease recognition task. We also present a text pre-processing methodology implemented in both systems and describe an initial weakly-supervised fine-tuning phase alongside with a submission post-processing procedure designed for Task 10. The systems obtained 0.84 F1-score on the Task 5 and 0.77 F1-score on Task 10.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chizhikova-etal-2022-sinai">
<titleInfo>
<title>SINAI@SMM4H’22: Transformers for biomedical social media text mining in Spanish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariia</namePart>
<namePart type="family">Chizhikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pilar</namePart>
<namePart type="family">López-Úbeda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Díaz-Galiano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">L</namePart>
<namePart type="given">Alfonso</namePart>
<namePart type="family">Ureña-López</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Martín-Valdivia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The Seventh Workshop on Social Media Mining for Health Applications, Workshop & Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Davy</namePart>
<namePart type="family">Weissenbacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper covers participation of the SINAI team in Tasks 5 and 10 of the Social Media Mining for Health (#SSM4H) workshop at COLING-2022. These tasks focus on leveraging Twitter posts written in Spanish for healthcare research. The objective of Task 5 was to classify tweets reporting COVID-19 symptoms, while Task 10 required identifying disease mentions in Twitter posts. The presented systems explore large RoBERTa language models pre-trained on Twitter data in the case of tweet classification task and general-domain data for the disease recognition task. We also present a text pre-processing methodology implemented in both systems and describe an initial weakly-supervised fine-tuning phase alongside with a submission post-processing procedure designed for Task 10. The systems obtained 0.84 F1-score on the Task 5 and 0.77 F1-score on Task 10.</abstract>
<identifier type="citekey">chizhikova-etal-2022-sinai</identifier>
<location>
<url>https://aclanthology.org/2022.smm4h-1.8</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>27</start>
<end>30</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SINAI@SMM4H’22: Transformers for biomedical social media text mining in Spanish
%A Chizhikova, Mariia
%A López-Úbeda, Pilar
%A Díaz-Galiano, Manuel C.
%A Ureña-López, L. Alfonso
%A Martín-Valdivia, M. Teresa
%Y Gonzalez-Hernandez, Graciela
%Y Weissenbacher, Davy
%S Proceedings of The Seventh Workshop on Social Media Mining for Health Applications, Workshop & Shared Task
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F chizhikova-etal-2022-sinai
%X This paper covers participation of the SINAI team in Tasks 5 and 10 of the Social Media Mining for Health (#SSM4H) workshop at COLING-2022. These tasks focus on leveraging Twitter posts written in Spanish for healthcare research. The objective of Task 5 was to classify tweets reporting COVID-19 symptoms, while Task 10 required identifying disease mentions in Twitter posts. The presented systems explore large RoBERTa language models pre-trained on Twitter data in the case of tweet classification task and general-domain data for the disease recognition task. We also present a text pre-processing methodology implemented in both systems and describe an initial weakly-supervised fine-tuning phase alongside with a submission post-processing procedure designed for Task 10. The systems obtained 0.84 F1-score on the Task 5 and 0.77 F1-score on Task 10.
%U https://aclanthology.org/2022.smm4h-1.8
%P 27-30
Markdown (Informal)
[SINAI@SMM4H’22: Transformers for biomedical social media text mining in Spanish](https://aclanthology.org/2022.smm4h-1.8) (Chizhikova et al., SMM4H 2022)
ACL
- Mariia Chizhikova, Pilar López-Úbeda, Manuel C. Díaz-Galiano, L. Alfonso Ureña-López, and M. Teresa Martín-Valdivia. 2022. SINAI@SMM4H’22: Transformers for biomedical social media text mining in Spanish. In Proceedings of The Seventh Workshop on Social Media Mining for Health Applications, Workshop & Shared Task, pages 27–30, Gyeongju, Republic of Korea. Association for Computational Linguistics.