@inproceedings{gartner-2020-corpus,
title = "The Corpus Query Middleware of Tomorrow {--} A Proposal for a Hybrid Corpus Query Architecture",
author = {G{\"a}rtner, Markus},
editor = {Ba{\'n}ski, Piotr and
Barbaresi, Adrien and
Clematide, Simon and
Kupietz, Marc and
L{\"u}ngen, Harald and
Pisetta, Ines},
booktitle = "Proceedings of the 8th Workshop on Challenges in the Management of Large Corpora",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Ressources Association",
url = "https://aclanthology.org/2020.cmlc-1.5/",
pages = "31--39",
language = "eng",
ISBN = "979-10-95546-61-0",
abstract = "Development of dozens of specialized corpus query systems and languages over the past decades has let to a diverse but also fragmented landscape. Today we are faced with a plethora of query tools that each provide unique features, but which are also not interoperable and often rely on very specific database back-ends or formats for storage. This severely hampers usability both for end users that want to query different corpora and also for corpus designers that wish to provide users with an interface for querying and exploration. We propose a hybrid corpus query architecture as a first step to overcoming this issue. It takes the form of a middleware system between user front-ends and optional database or text indexing solutions as back-ends. At its core is a custom query evaluation engine for index-less processing of corpus queries. With a flexible JSON-LD query protocol the approach allows communication with back-end systems to partially solve queries and offset some of the performance penalties imposed by the custom evaluation engine. This paper outlines the details of our first draft of aforementioned architecture."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gartner-2020-corpus">
<titleInfo>
<title>The Corpus Query Middleware of Tomorrow – A Proposal for a Hybrid Corpus Query Architecture</title>
</titleInfo>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Gärtner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Challenges in the Management of Large Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Bański</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrien</namePart>
<namePart type="family">Barbaresi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Clematide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Kupietz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harald</namePart>
<namePart type="family">Lüngen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Pisetta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Ressources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-61-0</identifier>
</relatedItem>
<abstract>Development of dozens of specialized corpus query systems and languages over the past decades has let to a diverse but also fragmented landscape. Today we are faced with a plethora of query tools that each provide unique features, but which are also not interoperable and often rely on very specific database back-ends or formats for storage. This severely hampers usability both for end users that want to query different corpora and also for corpus designers that wish to provide users with an interface for querying and exploration. We propose a hybrid corpus query architecture as a first step to overcoming this issue. It takes the form of a middleware system between user front-ends and optional database or text indexing solutions as back-ends. At its core is a custom query evaluation engine for index-less processing of corpus queries. With a flexible JSON-LD query protocol the approach allows communication with back-end systems to partially solve queries and offset some of the performance penalties imposed by the custom evaluation engine. This paper outlines the details of our first draft of aforementioned architecture.</abstract>
<identifier type="citekey">gartner-2020-corpus</identifier>
<location>
<url>https://aclanthology.org/2020.cmlc-1.5/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>31</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Corpus Query Middleware of Tomorrow – A Proposal for a Hybrid Corpus Query Architecture
%A Gärtner, Markus
%Y Bański, Piotr
%Y Barbaresi, Adrien
%Y Clematide, Simon
%Y Kupietz, Marc
%Y Lüngen, Harald
%Y Pisetta, Ines
%S Proceedings of the 8th Workshop on Challenges in the Management of Large Corpora
%D 2020
%8 May
%I European Language Ressources Association
%C Marseille, France
%@ 979-10-95546-61-0
%G eng
%F gartner-2020-corpus
%X Development of dozens of specialized corpus query systems and languages over the past decades has let to a diverse but also fragmented landscape. Today we are faced with a plethora of query tools that each provide unique features, but which are also not interoperable and often rely on very specific database back-ends or formats for storage. This severely hampers usability both for end users that want to query different corpora and also for corpus designers that wish to provide users with an interface for querying and exploration. We propose a hybrid corpus query architecture as a first step to overcoming this issue. It takes the form of a middleware system between user front-ends and optional database or text indexing solutions as back-ends. At its core is a custom query evaluation engine for index-less processing of corpus queries. With a flexible JSON-LD query protocol the approach allows communication with back-end systems to partially solve queries and offset some of the performance penalties imposed by the custom evaluation engine. This paper outlines the details of our first draft of aforementioned architecture.
%U https://aclanthology.org/2020.cmlc-1.5/
%P 31-39
Markdown (Informal)
[The Corpus Query Middleware of Tomorrow – A Proposal for a Hybrid Corpus Query Architecture](https://aclanthology.org/2020.cmlc-1.5/) (Gärtner, CMLC 2020)
ACL