@inproceedings{lee-chieu-2021-co,
title = "Co-training for Commit Classification",
author = "Lee, Jian Yi David and
Chieu, Hai Leong",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wnut-1.43",
doi = "10.18653/v1/2021.wnut-1.43",
pages = "389--395",
abstract = "Commits in version control systems (e.g. Git) track changes in a software project. Commits comprise noisy user-generated natural language and code patches. Automatic commit classification (CC) has been used to determine the type of code maintenance activities performed, as well as to detect bug fixes in code repositories. Much prior work occurs in the fully-supervised setting {--} a setting that can be a stretch in resource-scarce situations presenting difficulties in labeling commits. In this paper, we apply co-training, a semi-supervised learning method, to take advantage of the two views available {--} the commit message (natural language) and the code changes (programming language) {--} to improve commit classification.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-chieu-2021-co">
<titleInfo>
<title>Co-training for Commit Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jian</namePart>
<namePart type="given">Yi</namePart>
<namePart type="given">David</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hai</namePart>
<namePart type="given">Leong</namePart>
<namePart type="family">Chieu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Commits in version control systems (e.g. Git) track changes in a software project. Commits comprise noisy user-generated natural language and code patches. Automatic commit classification (CC) has been used to determine the type of code maintenance activities performed, as well as to detect bug fixes in code repositories. Much prior work occurs in the fully-supervised setting – a setting that can be a stretch in resource-scarce situations presenting difficulties in labeling commits. In this paper, we apply co-training, a semi-supervised learning method, to take advantage of the two views available – the commit message (natural language) and the code changes (programming language) – to improve commit classification.</abstract>
<identifier type="citekey">lee-chieu-2021-co</identifier>
<identifier type="doi">10.18653/v1/2021.wnut-1.43</identifier>
<location>
<url>https://aclanthology.org/2021.wnut-1.43</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>389</start>
<end>395</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Co-training for Commit Classification
%A Lee, Jian Yi David
%A Chieu, Hai Leong
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F lee-chieu-2021-co
%X Commits in version control systems (e.g. Git) track changes in a software project. Commits comprise noisy user-generated natural language and code patches. Automatic commit classification (CC) has been used to determine the type of code maintenance activities performed, as well as to detect bug fixes in code repositories. Much prior work occurs in the fully-supervised setting – a setting that can be a stretch in resource-scarce situations presenting difficulties in labeling commits. In this paper, we apply co-training, a semi-supervised learning method, to take advantage of the two views available – the commit message (natural language) and the code changes (programming language) – to improve commit classification.
%R 10.18653/v1/2021.wnut-1.43
%U https://aclanthology.org/2021.wnut-1.43
%U https://doi.org/10.18653/v1/2021.wnut-1.43
%P 389-395
Markdown (Informal)
[Co-training for Commit Classification](https://aclanthology.org/2021.wnut-1.43) (Lee & Chieu, WNUT 2021)
ACL
- Jian Yi David Lee and Hai Leong Chieu. 2021. Co-training for Commit Classification. In Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021), pages 389–395, Online. Association for Computational Linguistics.