Skip to content

Commit 78950eb

Browse files
author
Mark Hale
committed
GraphDB text-search dialect.
1 parent 3ea6c18 commit 78950eb

File tree

2 files changed

+236
-0
lines changed

2 files changed

+236
-0
lines changed

dockerfiles/config/graphdb.rq

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
PREFIX :<http://www.ontotext.com/connectors/lucene#>
2+
PREFIX inst:<http://www.ontotext.com/connectors/lucene/instance#>
3+
INSERT DATA {
4+
inst:skosmos :createConnector '''
5+
{
6+
"fields": [
7+
{
8+
"fieldName": "prefLabel",
9+
"propertyChain": [
10+
"http://www.w3.org/2004/02/skos/core#prefLabel"
11+
],
12+
"indexed": true,
13+
"stored": false,
14+
"analyzed": true,
15+
"multivalued": false,
16+
"ignoreInvalidValues": false,
17+
"facet": false
18+
},
19+
{
20+
"fieldName": "altLabel",
21+
"propertyChain": [
22+
"http://www.w3.org/2004/02/skos/core#altLabel"
23+
],
24+
"indexed": true,
25+
"stored": false,
26+
"analyzed": true,
27+
"multivalued": true,
28+
"ignoreInvalidValues": false,
29+
"facet": false
30+
},
31+
{
32+
"fieldName": "hiddenLabel",
33+
"propertyChain": [
34+
"http://www.w3.org/2004/02/skos/core#hiddenLabel"
35+
],
36+
"indexed": true,
37+
"stored": false,
38+
"analyzed": true,
39+
"multivalued": true,
40+
"ignoreInvalidValues": false,
41+
"facet": false
42+
},
43+
{
44+
"fieldName": "notation",
45+
"propertyChain": [
46+
"http://www.w3.org/2004/02/skos/core#notation"
47+
],
48+
"indexed": true,
49+
"stored": false,
50+
"analyzed": true,
51+
"multivalued": true,
52+
"ignoreInvalidValues": false,
53+
"facet": false
54+
}
55+
],
56+
"languages": [],
57+
"types": [
58+
"http://www.w3.org/2004/02/skos/core#Concept"
59+
],
60+
"readonly": false,
61+
"detectFields": false,
62+
"importGraph": false,
63+
"skipInitialIndexing": false,
64+
"boostProperties": [],
65+
"stripMarkup": false
66+
}
67+
''' .
68+
}

model/sparql/GraphDBTextSparql.php

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
<?php
2+
3+
/* Register text: namespace needed for graphdb-text queries */
4+
EasyRdf\RdfNamespace::set('con', 'http://www.ontotext.com/connectors/lucene#'); // @codeCoverageIgnore
5+
EasyRdf\RdfNamespace::set('con-inst', 'http://www.ontotext.com/connectors/lucene/instance#'); // @codeCoverageIgnore
6+
EasyRdf\RdfNamespace::set('afn', 'http://jena.hpl.hp.com/ARQ/function#'); // @codeCoverageIgnore
7+
8+
/**
9+
* Provides functions tailored to the GraphDBTextSparql extensions for GraphDB.
10+
*/
11+
class GraphDBTextSparql extends GenericSparql
12+
{
13+
/**
14+
* How many results to ask from the graphdb-text index. graphdb-text defaults to
15+
* 10000.
16+
*/
17+
const MAX_N = 100000;
18+
19+
/*
20+
* Characters that need to be quoted for the Lucene query parser.
21+
* See http://lucene.apache.org/core/4_10_1/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters
22+
*/
23+
const LUCENE_ESCAPE_CHARS = ' +-&|!(){}[]^"~?:\\/'; /* note: don't include * because we want wildcard expansion
24+
25+
/**
26+
* Make a graphdb-text query condition that narrows the amount of search
27+
* results in term searches
28+
*
29+
* @param string $term search term
30+
* @param string $property property to search (e.g. 'skos:prefLabel'), or '' for default
31+
* @param string $langClause not currently supported
32+
* @return string SPARQL text search clause
33+
*/
34+
35+
private function createTextQueryCondition($term, $property = '', $langClause = '')
36+
{
37+
// construct the lucene search term for graphdb-text
38+
39+
// 1. Ensure characters with special meaning in Lucene are escaped
40+
$lucenemap = array();
41+
foreach (str_split(self::LUCENE_ESCAPE_CHARS) as $char) {
42+
$lucenemap[$char] = '\\' . $char; // escape with a backslash
43+
}
44+
$term = strtr($term, $lucenemap);
45+
46+
// 2. Ensure proper SPARQL quoting
47+
$term = str_replace('\\', '\\\\', $term); // escape backslashes
48+
$term = str_replace("'", "\\'", $term); // escape single quotes
49+
50+
$maxResults = self::MAX_N;
51+
52+
$query = <<<EOQ
53+
bind(afn:localname($property) as ?field)
54+
bind(concat(?field, ':', '\"$term\"') as ?query)
55+
[] a con-inst:skosmos ;
56+
con:query ?query ;
57+
con:entities ?s ;
58+
con:limit $maxResults .
59+
?s con:score ?score ;
60+
EOQ;
61+
return $query;
62+
}
63+
64+
/**
65+
* Generate graphdb-text search condition for matching labels in SPARQL
66+
* @param string $term search term
67+
* @param string $searchLang language code used for matching labels (null means any language)
68+
* @return string sparql query snippet
69+
*/
70+
protected function generateConceptSearchQueryCondition($term, $searchLang)
71+
{
72+
# make text query clauses
73+
$langClause = $searchLang ? '?langParam' : '';
74+
$textcond = $this->createTextQueryCondition($term, '?prop', $langClause);
75+
76+
return $textcond;
77+
}
78+
79+
/**
80+
* This function generates graphdbtext language clauses from the search language tag
81+
* @param string $lang
82+
* @return string formatted language clause
83+
*/
84+
protected function generateLangClause($lang) {
85+
return "'lang:$lang*'";
86+
}
87+
88+
89+
/**
90+
* Generates sparql query clauses used for ordering by an expression.
91+
* @param string $expression the expression used for ordering the results
92+
* @param string $lang language
93+
* @return string sparql order by clause
94+
*/
95+
private function formatOrderBy($expression, $lang) {
96+
return $expression;
97+
}
98+
99+
/**
100+
* Generates the graphdb-text-specific sparql query used for rendering the alphabetical index.
101+
* @param string $letter the letter (or special class) to search for
102+
* @param string $lang language of labels
103+
* @param integer $limit limits the amount of results
104+
* @param integer $offset offsets the result set
105+
* @param array|null $classes
106+
* @param boolean $showDeprecated whether to include deprecated concepts in the result (default: false)
107+
* @param \EasyRdf\Resource|null $qualifier alphabetical list qualifier resource or null (default: null)
108+
* @return string sparql query
109+
*/
110+
111+
public function generateAlphabeticalListQuery($letter, $lang, $limit = null, $offset = null, $classes = null, $showDeprecated = false, $qualifier = null)
112+
{
113+
if ($letter == '*' || $letter == '0-9' || $letter == '!*') {
114+
// text index cannot support special character queries, use the generic implementation for these
115+
return parent::generateAlphabeticalListQuery($letter, $lang, $limit, $offset, $classes, $showDeprecated, $qualifier);
116+
}
117+
118+
$gc = $this->graphClause;
119+
$classes = ($classes) ? $classes : array('http://www.w3.org/2004/02/skos/core#Concept');
120+
$values = $this->formatValues('?type', $classes, 'uri');
121+
$limitandoffset = $this->formatLimitAndOffset($limit, $offset);
122+
123+
# make text query clause
124+
$lcletter = mb_strtolower($letter, 'UTF-8'); // convert to lower case, UTF-8 safe
125+
$langClause = $this->generateLangClause($lang);
126+
$textcondPref = $this->createTextQueryCondition($letter . '*', 'skos:prefLabel', $langClause);
127+
$textcondAlt = $this->createTextQueryCondition($letter . '*', 'skos:altLabel', $langClause);
128+
$orderbyclause = $this->formatOrderBy("LCASE(?match)", $lang) . " STR(?s) LCASE(STR(?qualifier))";
129+
130+
$qualifierClause = $qualifier ? "OPTIONAL { ?s <" . $qualifier->getURI() . "> ?qualifier }" : "";
131+
132+
$filterDeprecated="";
133+
if(!$showDeprecated){
134+
$filterDeprecated="FILTER NOT EXISTS { ?s owl:deprecated true }";
135+
}
136+
137+
$query = <<<EOQ
138+
SELECT DISTINCT ?s ?label ?alabel ?qualifier
139+
WHERE {
140+
$gc {
141+
{
142+
$textcondPref
143+
FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter'))
144+
FILTER EXISTS { ?s skos:prefLabel ?match }
145+
BIND(?match as ?label)
146+
}
147+
UNION
148+
{
149+
$textcondAlt
150+
FILTER(STRSTARTS(LCASE(STR(?match)), '$lcletter'))
151+
FILTER EXISTS { ?s skos:altLabel ?match }
152+
BIND(?match as ?alabel)
153+
{
154+
?s skos:prefLabel ?label .
155+
FILTER (langMatches(LANG(?label), '$lang'))
156+
}
157+
}
158+
?s a ?type .
159+
$qualifierClause
160+
$filterDeprecated
161+
} $values
162+
}
163+
ORDER BY $orderbyclause $limitandoffset
164+
EOQ;
165+
return $query;
166+
}
167+
168+
}

0 commit comments

Comments
 (0)