@inproceedings{wang-etal-2024-intent,
title = "Intent-Aware and Hate-Mitigating Counterspeech Generation via Dual-Discriminator Guided {LLM}s",
author = "Wang, Haiyang and
Tian, Zhiliang and
Song, Xin and
Zhang, Yue and
Pan, Yuchen and
Tu, Hongkui and
Huang, Minlie and
Zhou, Bin",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.800/",
pages = "9131--9142",
abstract = "Counterspeech is an effective way to combat online hate speech. Considering the multifaceted nature of online hate speech, counterspeech with varying intents (e.g., denouncing or empathy) has significant potential to mitigate hate speech effectively. Recently, controlled approaches based on large language models (LLMs) have been explored to generate intent-specific counterspeech. Due to the lack of attention to intent-specific information by LLMs during the decoding process, those methods cater more to the semantic information rather than matching with the desired intents. Further, there are still limitations in quantitatively evaluating the effectiveness of counterspeech with different intents in mitigating hate speech. In this paper, to address the above issues, we propose DART, an LLMs-based DuAl-discRiminaTor guided framework for counterspeech generation. We employ an intent-aware discriminator and hate-mitigating discriminator to jointly guide the decoding preferences of LLMs, which facilitates the model towards generating counterspeech catering to specific intent and hate mitigation. We apply a maximum-margin relative objective for training discriminators. This objective leverages the distance between counterspeech aligned with the desired target (such as specific intent or effectiveness in hate mitigation) and undesired as an effective learning signal. Extensive experiments show that DART achieves excellent performances in matching the desired intent and mitigating hate."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2024-intent">
<titleInfo>
<title>Intent-Aware and Hate-Mitigating Counterspeech Generation via Dual-Discriminator Guided LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haiyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiliang</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuchen</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongkui</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minlie</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Counterspeech is an effective way to combat online hate speech. Considering the multifaceted nature of online hate speech, counterspeech with varying intents (e.g., denouncing or empathy) has significant potential to mitigate hate speech effectively. Recently, controlled approaches based on large language models (LLMs) have been explored to generate intent-specific counterspeech. Due to the lack of attention to intent-specific information by LLMs during the decoding process, those methods cater more to the semantic information rather than matching with the desired intents. Further, there are still limitations in quantitatively evaluating the effectiveness of counterspeech with different intents in mitigating hate speech. In this paper, to address the above issues, we propose DART, an LLMs-based DuAl-discRiminaTor guided framework for counterspeech generation. We employ an intent-aware discriminator and hate-mitigating discriminator to jointly guide the decoding preferences of LLMs, which facilitates the model towards generating counterspeech catering to specific intent and hate mitigation. We apply a maximum-margin relative objective for training discriminators. This objective leverages the distance between counterspeech aligned with the desired target (such as specific intent or effectiveness in hate mitigation) and undesired as an effective learning signal. Extensive experiments show that DART achieves excellent performances in matching the desired intent and mitigating hate.</abstract>
<identifier type="citekey">wang-etal-2024-intent</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.800/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>9131</start>
<end>9142</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Intent-Aware and Hate-Mitigating Counterspeech Generation via Dual-Discriminator Guided LLMs
%A Wang, Haiyang
%A Tian, Zhiliang
%A Song, Xin
%A Zhang, Yue
%A Pan, Yuchen
%A Tu, Hongkui
%A Huang, Minlie
%A Zhou, Bin
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F wang-etal-2024-intent
%X Counterspeech is an effective way to combat online hate speech. Considering the multifaceted nature of online hate speech, counterspeech with varying intents (e.g., denouncing or empathy) has significant potential to mitigate hate speech effectively. Recently, controlled approaches based on large language models (LLMs) have been explored to generate intent-specific counterspeech. Due to the lack of attention to intent-specific information by LLMs during the decoding process, those methods cater more to the semantic information rather than matching with the desired intents. Further, there are still limitations in quantitatively evaluating the effectiveness of counterspeech with different intents in mitigating hate speech. In this paper, to address the above issues, we propose DART, an LLMs-based DuAl-discRiminaTor guided framework for counterspeech generation. We employ an intent-aware discriminator and hate-mitigating discriminator to jointly guide the decoding preferences of LLMs, which facilitates the model towards generating counterspeech catering to specific intent and hate mitigation. We apply a maximum-margin relative objective for training discriminators. This objective leverages the distance between counterspeech aligned with the desired target (such as specific intent or effectiveness in hate mitigation) and undesired as an effective learning signal. Extensive experiments show that DART achieves excellent performances in matching the desired intent and mitigating hate.
%U https://aclanthology.org/2024.lrec-main.800/
%P 9131-9142
Markdown (Informal)
[Intent-Aware and Hate-Mitigating Counterspeech Generation via Dual-Discriminator Guided LLMs](https://aclanthology.org/2024.lrec-main.800/) (Wang et al., LREC-COLING 2024)
ACL
- Haiyang Wang, Zhiliang Tian, Xin Song, Yue Zhang, Yuchen Pan, Hongkui Tu, Minlie Huang, and Bin Zhou. 2024. Intent-Aware and Hate-Mitigating Counterspeech Generation via Dual-Discriminator Guided LLMs. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 9131–9142, Torino, Italia. ELRA and ICCL.