@inproceedings{kuznetsov-etal-2024-robust,
title = "Robust {AI}-Generated Text Detection by Restricted Embeddings",
author = "Kuznetsov, Kristian and
Tulchinskii, Eduard and
Kushnareva, Laida and
Magai, German and
Barannikov, Serguei and
Nikolenko, Sergey and
Piontkovskaya, Irina",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.992/",
doi = "10.18653/v1/2024.findings-emnlp.992",
pages = "17036--17055",
abstract = "Growing amount and quality of AI-generated texts makes detecting such content more difficult. In most real-world scenarios, the domain (style and topic) of generated data and the generator model are not known in advance. In this work, we focus on the robustness of classifier-based detectors of AI-generated text, namely their ability to transfer to unseen generators or semantic domains. We investigate the geometry of the embedding space of Transformer-based text encoders and show that clearing out harmful linear subspaces helps to train a robust classifier, ignoring domain-specific spurious features. We investigate several subspace decomposition and feature selection strategies and achieve significant improvements over state of the art methods in cross-domain and cross-generator transfer. Our best approaches for head-wise and coordinate-based subspace removal increase the mean out-of-distribution (OOD) classification score by up to 9{\%} and 14{\%} in particular setups for RoBERTa and BERT embeddings respectively. We release our code and data: https://github.com/SilverSolver/RobustATD"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuznetsov-etal-2024-robust">
<titleInfo>
<title>Robust AI-Generated Text Detection by Restricted Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristian</namePart>
<namePart type="family">Kuznetsov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Tulchinskii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laida</namePart>
<namePart type="family">Kushnareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">German</namePart>
<namePart type="family">Magai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serguei</namePart>
<namePart type="family">Barannikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergey</namePart>
<namePart type="family">Nikolenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Piontkovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Growing amount and quality of AI-generated texts makes detecting such content more difficult. In most real-world scenarios, the domain (style and topic) of generated data and the generator model are not known in advance. In this work, we focus on the robustness of classifier-based detectors of AI-generated text, namely their ability to transfer to unseen generators or semantic domains. We investigate the geometry of the embedding space of Transformer-based text encoders and show that clearing out harmful linear subspaces helps to train a robust classifier, ignoring domain-specific spurious features. We investigate several subspace decomposition and feature selection strategies and achieve significant improvements over state of the art methods in cross-domain and cross-generator transfer. Our best approaches for head-wise and coordinate-based subspace removal increase the mean out-of-distribution (OOD) classification score by up to 9% and 14% in particular setups for RoBERTa and BERT embeddings respectively. We release our code and data: https://github.com/SilverSolver/RobustATD</abstract>
<identifier type="citekey">kuznetsov-etal-2024-robust</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.992</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.992/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>17036</start>
<end>17055</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Robust AI-Generated Text Detection by Restricted Embeddings
%A Kuznetsov, Kristian
%A Tulchinskii, Eduard
%A Kushnareva, Laida
%A Magai, German
%A Barannikov, Serguei
%A Nikolenko, Sergey
%A Piontkovskaya, Irina
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F kuznetsov-etal-2024-robust
%X Growing amount and quality of AI-generated texts makes detecting such content more difficult. In most real-world scenarios, the domain (style and topic) of generated data and the generator model are not known in advance. In this work, we focus on the robustness of classifier-based detectors of AI-generated text, namely their ability to transfer to unseen generators or semantic domains. We investigate the geometry of the embedding space of Transformer-based text encoders and show that clearing out harmful linear subspaces helps to train a robust classifier, ignoring domain-specific spurious features. We investigate several subspace decomposition and feature selection strategies and achieve significant improvements over state of the art methods in cross-domain and cross-generator transfer. Our best approaches for head-wise and coordinate-based subspace removal increase the mean out-of-distribution (OOD) classification score by up to 9% and 14% in particular setups for RoBERTa and BERT embeddings respectively. We release our code and data: https://github.com/SilverSolver/RobustATD
%R 10.18653/v1/2024.findings-emnlp.992
%U https://aclanthology.org/2024.findings-emnlp.992/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.992
%P 17036-17055
Markdown (Informal)
[Robust AI-Generated Text Detection by Restricted Embeddings](https://aclanthology.org/2024.findings-emnlp.992/) (Kuznetsov et al., Findings 2024)
ACL
- Kristian Kuznetsov, Eduard Tulchinskii, Laida Kushnareva, German Magai, Serguei Barannikov, Sergey Nikolenko, and Irina Piontkovskaya. 2024. Robust AI-Generated Text Detection by Restricted Embeddings. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 17036–17055, Miami, Florida, USA. Association for Computational Linguistics.