@inproceedings{enomoto-etal-2024-investigating, title = "Investigating Web Corpus Filtering Methods for Language Model Development in {J}apanese", author = "Enomoto, Rintaro and Tolmachev, Arseny and Niitsuma, Takuro and Kurita, Shuhei and Kawahara, Daisuke", editor = "Cao, Yang (Trista) and Papadimitriou, Isabel and Ovalle, Anaelia and Zampieri, Marcos and Ferraro, Francis and Swayamdipta, Swabha", booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)", month = jun, year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.naacl-srw.18/", doi = "10.18653/v1/2024.naacl-srw.18", pages = "154--160" }