@inproceedings{liu-etal-2022-lightweight,
title = "Lightweight Sound Event Detection Model with {R}ep{VGG} Architecture",
author = "Liu, Chia-Chuan and
Huang, Sung-Jen and
Chen, Chia-Ping and
Lu, Chung-Li and
Chan, Bo-Cheng and
Cheng, Yu-Han and
Chuang, Hsiang-Feng and
Chen, Wei-Yu",
editor = "Chang, Yung-Chun and
Huang, Yi-Chin",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.17/",
pages = "129--135",
language = "zho",
abstract = "In this paper, we proposed RepVGGRNN, which is a light weight sound event detection model. We use RepVGG convolution blocks in the convolution part to improve performance, and re-parameterize the RepVGG blocks after the model is trained to reduce the parameters of the convolution layers. To further improve the accuracy of the model, we incorporated both the mean teacher method and knowledge distillation to train the lightweight model. The proposed system achieves PSDS (Polyphonic sound event detection score)-scenario 1, 2 of 40.8{\%} and 67.7{\%} outperforms the baseline system of 34.4{\%} and 57.2{\%} on the DCASE 2022 Task4 validation dataset. The quantity of the parameters in the proposed system is about 49.6K, only 44.6{\%} of the baseline system."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2022-lightweight">
<titleInfo>
<title>Lightweight Sound Event Detection Model with RepVGG Architecture</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chia-Chuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sung-Jen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chia-Ping</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Li</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bo-Cheng</namePart>
<namePart type="family">Chan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Han</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsiang-Feng</namePart>
<namePart type="family">Chuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei-Yu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">zho</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yung-Chun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Chin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we proposed RepVGGRNN, which is a light weight sound event detection model. We use RepVGG convolution blocks in the convolution part to improve performance, and re-parameterize the RepVGG blocks after the model is trained to reduce the parameters of the convolution layers. To further improve the accuracy of the model, we incorporated both the mean teacher method and knowledge distillation to train the lightweight model. The proposed system achieves PSDS (Polyphonic sound event detection score)-scenario 1, 2 of 40.8% and 67.7% outperforms the baseline system of 34.4% and 57.2% on the DCASE 2022 Task4 validation dataset. The quantity of the parameters in the proposed system is about 49.6K, only 44.6% of the baseline system.</abstract>
<identifier type="citekey">liu-etal-2022-lightweight</identifier>
<location>
<url>https://aclanthology.org/2022.rocling-1.17/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>129</start>
<end>135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lightweight Sound Event Detection Model with RepVGG Architecture
%A Liu, Chia-Chuan
%A Huang, Sung-Jen
%A Chen, Chia-Ping
%A Lu, Chung-Li
%A Chan, Bo-Cheng
%A Cheng, Yu-Han
%A Chuang, Hsiang-Feng
%A Chen, Wei-Yu
%Y Chang, Yung-Chun
%Y Huang, Yi-Chin
%S Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)
%D 2022
%8 November
%I The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
%C Taipei, Taiwan
%G zho
%F liu-etal-2022-lightweight
%X In this paper, we proposed RepVGGRNN, which is a light weight sound event detection model. We use RepVGG convolution blocks in the convolution part to improve performance, and re-parameterize the RepVGG blocks after the model is trained to reduce the parameters of the convolution layers. To further improve the accuracy of the model, we incorporated both the mean teacher method and knowledge distillation to train the lightweight model. The proposed system achieves PSDS (Polyphonic sound event detection score)-scenario 1, 2 of 40.8% and 67.7% outperforms the baseline system of 34.4% and 57.2% on the DCASE 2022 Task4 validation dataset. The quantity of the parameters in the proposed system is about 49.6K, only 44.6% of the baseline system.
%U https://aclanthology.org/2022.rocling-1.17/
%P 129-135
Markdown (Informal)
[Lightweight Sound Event Detection Model with RepVGG Architecture](https://aclanthology.org/2022.rocling-1.17/) (Liu et al., ROCLING 2022)
ACL
- Chia-Chuan Liu, Sung-Jen Huang, Chia-Ping Chen, Chung-Li Lu, Bo-Cheng Chan, Yu-Han Cheng, Hsiang-Feng Chuang, and Wei-Yu Chen. 2022. Lightweight Sound Event Detection Model with RepVGG Architecture. In Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022), pages 129–135, Taipei, Taiwan. The Association for Computational Linguistics and Chinese Language Processing (ACLCLP).