@inproceedings{tomita-etal-2024-reforging,
title = "Reforging : A Method for Constructing a Linguistically Valid {J}apanese {CCG} Treebank",
author = "Tomita, Asa and
Yanaka, Hitomi and
Bekki, Daisuke",
editor = "Falk, Neele and
Papi, Sara and
Zhang, Mike",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-srw.14",
pages = "196--207",
abstract = "The linguistic validity of Combinatory Categorial Grammar (CCG) parsing results relies heavily on treebanks for training and evaluation, so the treebank construction is crucial. Yet the current Japanese CCG treebank is known to have inaccuracies in its analyses of Japanese syntactic structures, including passive and causative constructions. While ABCTreebank, a treebank for ABC grammar, has been made to improve the analysis, particularly of argument structures, it lacks the detailed syntactic features required for Japanese CCG. In contrast, the Japanese CCG parser, lightblue, efficiently provides detailed syntactic features, but it does not accurately capture argument structures. We propose a method to generate a linguistically valid Japanese CCG treebank with detailed information by combining the strengths of ABCTreebank and lightblue. We develop an algorithm that filters lightblue{'}s lexical items using ABCTreebank, effectively converting lightblue output into a linguistically valid CCG treebank. To evaluate our treebank, we manually evaluate CCG syntactic structures and semantic representations and analyze conversion rates.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tomita-etal-2024-reforging">
<titleInfo>
<title>Reforging : A Method for Constructing a Linguistically Valid Japanese CCG Treebank</title>
</titleInfo>
<name type="personal">
<namePart type="given">Asa</namePart>
<namePart type="family">Tomita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitomi</namePart>
<namePart type="family">Yanaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daisuke</namePart>
<namePart type="family">Bekki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Neele</namePart>
<namePart type="family">Falk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Papi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The linguistic validity of Combinatory Categorial Grammar (CCG) parsing results relies heavily on treebanks for training and evaluation, so the treebank construction is crucial. Yet the current Japanese CCG treebank is known to have inaccuracies in its analyses of Japanese syntactic structures, including passive and causative constructions. While ABCTreebank, a treebank for ABC grammar, has been made to improve the analysis, particularly of argument structures, it lacks the detailed syntactic features required for Japanese CCG. In contrast, the Japanese CCG parser, lightblue, efficiently provides detailed syntactic features, but it does not accurately capture argument structures. We propose a method to generate a linguistically valid Japanese CCG treebank with detailed information by combining the strengths of ABCTreebank and lightblue. We develop an algorithm that filters lightblue’s lexical items using ABCTreebank, effectively converting lightblue output into a linguistically valid CCG treebank. To evaluate our treebank, we manually evaluate CCG syntactic structures and semantic representations and analyze conversion rates.</abstract>
<identifier type="citekey">tomita-etal-2024-reforging</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-srw.14</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>196</start>
<end>207</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reforging : A Method for Constructing a Linguistically Valid Japanese CCG Treebank
%A Tomita, Asa
%A Yanaka, Hitomi
%A Bekki, Daisuke
%Y Falk, Neele
%Y Papi, Sara
%Y Zhang, Mike
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F tomita-etal-2024-reforging
%X The linguistic validity of Combinatory Categorial Grammar (CCG) parsing results relies heavily on treebanks for training and evaluation, so the treebank construction is crucial. Yet the current Japanese CCG treebank is known to have inaccuracies in its analyses of Japanese syntactic structures, including passive and causative constructions. While ABCTreebank, a treebank for ABC grammar, has been made to improve the analysis, particularly of argument structures, it lacks the detailed syntactic features required for Japanese CCG. In contrast, the Japanese CCG parser, lightblue, efficiently provides detailed syntactic features, but it does not accurately capture argument structures. We propose a method to generate a linguistically valid Japanese CCG treebank with detailed information by combining the strengths of ABCTreebank and lightblue. We develop an algorithm that filters lightblue’s lexical items using ABCTreebank, effectively converting lightblue output into a linguistically valid CCG treebank. To evaluate our treebank, we manually evaluate CCG syntactic structures and semantic representations and analyze conversion rates.
%U https://aclanthology.org/2024.eacl-srw.14
%P 196-207
Markdown (Informal)
[Reforging : A Method for Constructing a Linguistically Valid Japanese CCG Treebank](https://aclanthology.org/2024.eacl-srw.14) (Tomita et al., EACL 2024)
ACL