@inproceedings{tang-clematide-2021-searching,
title = "Searching for Legal Documents at Paragraph Level: Automating Label Generation and Use of an Extended Attention Mask for Boosting Neural Models of Semantic Similarity",
author = "Tang, Li and
Clematide, Simon",
editor = "Aletras, Nikolaos and
Androutsopoulos, Ion and
Barrett, Leslie and
Goanta, Catalina and
Preotiuc-Pietro, Daniel",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.nllp-1.12/",
doi = "10.18653/v1/2021.nllp-1.12",
pages = "114--122",
abstract = "Searching for legal documents is a specialized Information Retrieval task that is relevant for expert users (lawyers and their assistants) and for non-expert users. By searching previous court decisions (cases), a user can better prepare the legal reasoning of a new case. Being able to search using a natural language text snippet instead of a more artificial query could help to prevent query formulation issues. Also, if semantic similarity could be modeled beyond exact lexical matches, more relevant results can be found even if the query terms don`t match exactly. For this domain, we formulated a task to compare different ways of modeling semantic similarity at paragraph level, using neural and non-neural systems. We compared systems that encode the query and the search collection paragraphs as vectors, enabling the use of cosine similarity for results ranking. After building a German dataset for cases and statutes from Switzerland, and extracting citations from cases to statutes, we developed an algorithm for estimating semantic similarity at paragraph level, using a link-based similarity method. When evaluating different systems in this way, we find that semantic similarity modeling by neural systems can be boosted with an extended attention mask that quenches noise in the inputs."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tang-clematide-2021-searching">
<titleInfo>
<title>Searching for Legal Documents at Paragraph Level: Automating Label Generation and Use of an Extended Attention Mask for Boosting Neural Models of Semantic Similarity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Clematide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ion</namePart>
<namePart type="family">Androutsopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalina</namePart>
<namePart type="family">Goanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preotiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Searching for legal documents is a specialized Information Retrieval task that is relevant for expert users (lawyers and their assistants) and for non-expert users. By searching previous court decisions (cases), a user can better prepare the legal reasoning of a new case. Being able to search using a natural language text snippet instead of a more artificial query could help to prevent query formulation issues. Also, if semantic similarity could be modeled beyond exact lexical matches, more relevant results can be found even if the query terms don‘t match exactly. For this domain, we formulated a task to compare different ways of modeling semantic similarity at paragraph level, using neural and non-neural systems. We compared systems that encode the query and the search collection paragraphs as vectors, enabling the use of cosine similarity for results ranking. After building a German dataset for cases and statutes from Switzerland, and extracting citations from cases to statutes, we developed an algorithm for estimating semantic similarity at paragraph level, using a link-based similarity method. When evaluating different systems in this way, we find that semantic similarity modeling by neural systems can be boosted with an extended attention mask that quenches noise in the inputs.</abstract>
<identifier type="citekey">tang-clematide-2021-searching</identifier>
<identifier type="doi">10.18653/v1/2021.nllp-1.12</identifier>
<location>
<url>https://aclanthology.org/2021.nllp-1.12/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>114</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Searching for Legal Documents at Paragraph Level: Automating Label Generation and Use of an Extended Attention Mask for Boosting Neural Models of Semantic Similarity
%A Tang, Li
%A Clematide, Simon
%Y Aletras, Nikolaos
%Y Androutsopoulos, Ion
%Y Barrett, Leslie
%Y Goanta, Catalina
%Y Preotiuc-Pietro, Daniel
%S Proceedings of the Natural Legal Language Processing Workshop 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F tang-clematide-2021-searching
%X Searching for legal documents is a specialized Information Retrieval task that is relevant for expert users (lawyers and their assistants) and for non-expert users. By searching previous court decisions (cases), a user can better prepare the legal reasoning of a new case. Being able to search using a natural language text snippet instead of a more artificial query could help to prevent query formulation issues. Also, if semantic similarity could be modeled beyond exact lexical matches, more relevant results can be found even if the query terms don‘t match exactly. For this domain, we formulated a task to compare different ways of modeling semantic similarity at paragraph level, using neural and non-neural systems. We compared systems that encode the query and the search collection paragraphs as vectors, enabling the use of cosine similarity for results ranking. After building a German dataset for cases and statutes from Switzerland, and extracting citations from cases to statutes, we developed an algorithm for estimating semantic similarity at paragraph level, using a link-based similarity method. When evaluating different systems in this way, we find that semantic similarity modeling by neural systems can be boosted with an extended attention mask that quenches noise in the inputs.
%R 10.18653/v1/2021.nllp-1.12
%U https://aclanthology.org/2021.nllp-1.12/
%U https://doi.org/10.18653/v1/2021.nllp-1.12
%P 114-122
Markdown (Informal)
[Searching for Legal Documents at Paragraph Level: Automating Label Generation and Use of an Extended Attention Mask for Boosting Neural Models of Semantic Similarity](https://aclanthology.org/2021.nllp-1.12/) (Tang & Clematide, NLLP 2021)
ACL