@inproceedings{pieuchon-etal-2024-large,
title = "Can Large Language Models (or Humans) Disentangle Text?",
author = "Audinet de Pieuchon, Nicolas and
Daoud, Adel and
Jerzak, Connor and
Johansson, Moa and
Johansson, Richard",
editor = "Card, Dallas and
Field, Anjalie and
Hovy, Dirk and
Keith, Katherine",
booktitle = "Proceedings of the Sixth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlpcss-1.5",
doi = "10.18653/v1/2024.nlpcss-1.5",
pages = "57--67",
abstract = "We investigate the potential of large language models (LLMs) to disentangle text variables{---}to remove the textual traces of an undesired forbidden variable in a task sometimes known as text distillation and closely related to the fairness in AI and causal inference literature. We employ a range of various LLM approaches in an attempt to disentangle text by identifying and removing information about a target variable while preserving other relevant signals. We show that in the strong test of removing sentiment, the statistical association between the processed text and sentiment is still detectable to machine learning classifiers post-LLM-disentanglement. Furthermore, we find that human annotators also struggle to disentangle sentiment while preserving other semantic content. This suggests there may be limited separability between concept variables in some text contexts, highlighting limitations of methods relying on text-level transformations and also raising questions about the robustness of disentanglement methods that achieve statistical independence in representation space.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pieuchon-etal-2024-large">
<titleInfo>
<title>Can Large Language Models (or Humans) Disentangle Text?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Audinet de Pieuchon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adel</namePart>
<namePart type="family">Daoud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Connor</namePart>
<namePart type="family">Jerzak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moa</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dallas</namePart>
<namePart type="family">Card</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anjalie</namePart>
<namePart type="family">Field</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the potential of large language models (LLMs) to disentangle text variables—to remove the textual traces of an undesired forbidden variable in a task sometimes known as text distillation and closely related to the fairness in AI and causal inference literature. We employ a range of various LLM approaches in an attempt to disentangle text by identifying and removing information about a target variable while preserving other relevant signals. We show that in the strong test of removing sentiment, the statistical association between the processed text and sentiment is still detectable to machine learning classifiers post-LLM-disentanglement. Furthermore, we find that human annotators also struggle to disentangle sentiment while preserving other semantic content. This suggests there may be limited separability between concept variables in some text contexts, highlighting limitations of methods relying on text-level transformations and also raising questions about the robustness of disentanglement methods that achieve statistical independence in representation space.</abstract>
<identifier type="citekey">pieuchon-etal-2024-large</identifier>
<identifier type="doi">10.18653/v1/2024.nlpcss-1.5</identifier>
<location>
<url>https://aclanthology.org/2024.nlpcss-1.5</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>57</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Large Language Models (or Humans) Disentangle Text?
%A Audinet de Pieuchon, Nicolas
%A Daoud, Adel
%A Jerzak, Connor
%A Johansson, Moa
%A Johansson, Richard
%Y Card, Dallas
%Y Field, Anjalie
%Y Hovy, Dirk
%Y Keith, Katherine
%S Proceedings of the Sixth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F pieuchon-etal-2024-large
%X We investigate the potential of large language models (LLMs) to disentangle text variables—to remove the textual traces of an undesired forbidden variable in a task sometimes known as text distillation and closely related to the fairness in AI and causal inference literature. We employ a range of various LLM approaches in an attempt to disentangle text by identifying and removing information about a target variable while preserving other relevant signals. We show that in the strong test of removing sentiment, the statistical association between the processed text and sentiment is still detectable to machine learning classifiers post-LLM-disentanglement. Furthermore, we find that human annotators also struggle to disentangle sentiment while preserving other semantic content. This suggests there may be limited separability between concept variables in some text contexts, highlighting limitations of methods relying on text-level transformations and also raising questions about the robustness of disentanglement methods that achieve statistical independence in representation space.
%R 10.18653/v1/2024.nlpcss-1.5
%U https://aclanthology.org/2024.nlpcss-1.5
%U https://doi.org/10.18653/v1/2024.nlpcss-1.5
%P 57-67
Markdown (Informal)
[Can Large Language Models (or Humans) Disentangle Text?](https://aclanthology.org/2024.nlpcss-1.5) (Audinet de Pieuchon et al., NLP+CSS-WS 2024)
ACL
- Nicolas Audinet de Pieuchon, Adel Daoud, Connor Jerzak, Moa Johansson, and Richard Johansson. 2024. Can Large Language Models (or Humans) Disentangle Text?. In Proceedings of the Sixth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS 2024), pages 57–67, Mexico City, Mexico. Association for Computational Linguistics.