@inproceedings{lima-etal-2020-inferring,
title = "Inferring about fraudulent collusion risk on {B}razilian public works contracts in official texts using a {B}i-{LSTM} approach",
author = "Lima, Marcos and
Silva, Roberta and
Lopes de Souza Mendes, Felipe and
R. de Carvalho, Leonardo and
Araujo, Aleteia and
de Barros Vidal, Flavio",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.143/",
doi = "10.18653/v1/2020.findings-emnlp.143",
pages = "1580--1588",
abstract = "Public works procurements move US{\$} 10 billion yearly in Brazil and are a preferred field for collusion and fraud. Federal Police and audit agencies investigate collusion (bid-rigging), over-pricing, and delivery fraud in this field and efforts have been employed to early detect fraud and collusion on public works procurements. The current automatic methods of fraud detection use structured data to classification and usually do not involve annotated data. The use of NLP for this kind of application is rare. Our work introduces a new dataset formed by public procurement calls available on Brazilian official journal (Di{\'a}rio Oficial da Uni{\~a}o), using by 15,132,968 textual entries of which 1,907 are annotated risky entries. Both bottleneck deep neural network and BiLSTM shown competitive compared with classical classifiers and achieved better precision (93.0{\%} and 92.4{\%}, respectively), which signs improvements in a criminal fraud investigation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lima-etal-2020-inferring">
<titleInfo>
<title>Inferring about fraudulent collusion risk on Brazilian public works contracts in official texts using a Bi-LSTM approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Lima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberta</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felipe</namePart>
<namePart type="family">Lopes de Souza Mendes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">R. de Carvalho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleteia</namePart>
<namePart type="family">Araujo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flavio</namePart>
<namePart type="family">de Barros Vidal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Public works procurements move US$ 10 billion yearly in Brazil and are a preferred field for collusion and fraud. Federal Police and audit agencies investigate collusion (bid-rigging), over-pricing, and delivery fraud in this field and efforts have been employed to early detect fraud and collusion on public works procurements. The current automatic methods of fraud detection use structured data to classification and usually do not involve annotated data. The use of NLP for this kind of application is rare. Our work introduces a new dataset formed by public procurement calls available on Brazilian official journal (Diário Oficial da União), using by 15,132,968 textual entries of which 1,907 are annotated risky entries. Both bottleneck deep neural network and BiLSTM shown competitive compared with classical classifiers and achieved better precision (93.0% and 92.4%, respectively), which signs improvements in a criminal fraud investigation.</abstract>
<identifier type="citekey">lima-etal-2020-inferring</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.143</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.143/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1580</start>
<end>1588</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Inferring about fraudulent collusion risk on Brazilian public works contracts in official texts using a Bi-LSTM approach
%A Lima, Marcos
%A Silva, Roberta
%A Lopes de Souza Mendes, Felipe
%A R. de Carvalho, Leonardo
%A Araujo, Aleteia
%A de Barros Vidal, Flavio
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F lima-etal-2020-inferring
%X Public works procurements move US$ 10 billion yearly in Brazil and are a preferred field for collusion and fraud. Federal Police and audit agencies investigate collusion (bid-rigging), over-pricing, and delivery fraud in this field and efforts have been employed to early detect fraud and collusion on public works procurements. The current automatic methods of fraud detection use structured data to classification and usually do not involve annotated data. The use of NLP for this kind of application is rare. Our work introduces a new dataset formed by public procurement calls available on Brazilian official journal (Diário Oficial da União), using by 15,132,968 textual entries of which 1,907 are annotated risky entries. Both bottleneck deep neural network and BiLSTM shown competitive compared with classical classifiers and achieved better precision (93.0% and 92.4%, respectively), which signs improvements in a criminal fraud investigation.
%R 10.18653/v1/2020.findings-emnlp.143
%U https://aclanthology.org/2020.findings-emnlp.143/
%U https://doi.org/10.18653/v1/2020.findings-emnlp.143
%P 1580-1588
Markdown (Informal)
[Inferring about fraudulent collusion risk on Brazilian public works contracts in official texts using a Bi-LSTM approach](https://aclanthology.org/2020.findings-emnlp.143/) (Lima et al., Findings 2020)
ACL