@inproceedings{farzana-etal-2024-carecorpus,
title = "{C}are{C}orpus+: Expanding and Augmenting Caregiver Strategy Data to Support Pediatric Rehabilitation",
author = "Farzana, Shahla and
Lucero, Ivana and
Villegas, Vivian and
Kaelin, Vera C and
Khetani, Mary and
Parde, Natalie",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.392/",
doi = "10.18653/v1/2024.emnlp-main.392",
pages = "6912--6927",
abstract = "Caregiver strategy classification in pediatric rehabilitation contexts is strongly motivated by real-world clinical constraints but highly under-resourced and seldom studied in natural language processing settings. We introduce a large dataset of 4,037 caregiver strategies in this setting, a five-fold increase over the nearest contemporary dataset. These strategies are manually categorized into clinically established constructs with high agreement ($\kappa$=0.68-0.89). We also propose two techniques to further address identified data constraints. First, we manually supplement target task data with publicly relevant data from online child health forums. Next, we propose a novel data augmentation technique to generate synthetic caregiver strategies with high downstream task utility. Extensive experiments showcase the quality of our dataset. They also establish evidence that both the publicly available data and the synthetic strategies result in large performance gains, with relative F$_1$ increases of 22.6{\%} and 50.9{\%}, respectively."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="farzana-etal-2024-carecorpus">
<titleInfo>
<title>CareCorpus+: Expanding and Augmenting Caregiver Strategy Data to Support Pediatric Rehabilitation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shahla</namePart>
<namePart type="family">Farzana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivana</namePart>
<namePart type="family">Lucero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivian</namePart>
<namePart type="family">Villegas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Kaelin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Khetani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Parde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Caregiver strategy classification in pediatric rehabilitation contexts is strongly motivated by real-world clinical constraints but highly under-resourced and seldom studied in natural language processing settings. We introduce a large dataset of 4,037 caregiver strategies in this setting, a five-fold increase over the nearest contemporary dataset. These strategies are manually categorized into clinically established constructs with high agreement (ąppa=0.68-0.89). We also propose two techniques to further address identified data constraints. First, we manually supplement target task data with publicly relevant data from online child health forums. Next, we propose a novel data augmentation technique to generate synthetic caregiver strategies with high downstream task utility. Extensive experiments showcase the quality of our dataset. They also establish evidence that both the publicly available data and the synthetic strategies result in large performance gains, with relative F₁ increases of 22.6% and 50.9%, respectively.</abstract>
<identifier type="citekey">farzana-etal-2024-carecorpus</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.392</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.392/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6912</start>
<end>6927</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CareCorpus+: Expanding and Augmenting Caregiver Strategy Data to Support Pediatric Rehabilitation
%A Farzana, Shahla
%A Lucero, Ivana
%A Villegas, Vivian
%A Kaelin, Vera C.
%A Khetani, Mary
%A Parde, Natalie
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F farzana-etal-2024-carecorpus
%X Caregiver strategy classification in pediatric rehabilitation contexts is strongly motivated by real-world clinical constraints but highly under-resourced and seldom studied in natural language processing settings. We introduce a large dataset of 4,037 caregiver strategies in this setting, a five-fold increase over the nearest contemporary dataset. These strategies are manually categorized into clinically established constructs with high agreement (ąppa=0.68-0.89). We also propose two techniques to further address identified data constraints. First, we manually supplement target task data with publicly relevant data from online child health forums. Next, we propose a novel data augmentation technique to generate synthetic caregiver strategies with high downstream task utility. Extensive experiments showcase the quality of our dataset. They also establish evidence that both the publicly available data and the synthetic strategies result in large performance gains, with relative F₁ increases of 22.6% and 50.9%, respectively.
%R 10.18653/v1/2024.emnlp-main.392
%U https://aclanthology.org/2024.emnlp-main.392/
%U https://doi.org/10.18653/v1/2024.emnlp-main.392
%P 6912-6927
Markdown (Informal)
[CareCorpus+: Expanding and Augmenting Caregiver Strategy Data to Support Pediatric Rehabilitation](https://aclanthology.org/2024.emnlp-main.392/) (Farzana et al., EMNLP 2024)
ACL