-
Notifications
You must be signed in to change notification settings - Fork 581
GH#2256 Introduce a query optimizer concept #2257
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
66a266a
bf71d4a
94ede34
78e9d90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,46 @@ | ||||||||
| from __future__ import annotations | ||||||||
|
|
||||||||
| """ | ||||||||
| This contains standard optimizers for sparql | ||||||||
| """ | ||||||||
| import re | ||||||||
| from rdflib import Literal | ||||||||
| from rdflib.plugins.sparql.operators import Builtin_CONTAINS, Builtin_REGEX | ||||||||
| from rdflib.plugins.sparql.sparql import Query | ||||||||
| from rdflib.plugins.sparql.algebra import CompValue, Join, Values, Expr | ||||||||
| from typing import Any | ||||||||
|
|
||||||||
| """ | ||||||||
| An interface for having optimizers that transform a query algebra hopefully | ||||||||
| in an faster to evaluate version. | ||||||||
| """ | ||||||||
|
|
||||||||
|
|
||||||||
| class SPARQLOptimizer: | ||||||||
| def optimize(self, query: Query) -> Query: | ||||||||
| return query | ||||||||
|
|
||||||||
|
|
||||||||
| class ValuesToTheLeftOfTheJoin(SPARQLOptimizer): | ||||||||
|
|
||||||||
| def optimize(self, query: Query) -> Query: | ||||||||
|
||||||||
| def optimize(self, query: Query) -> Query: | |
| @classmethod | |
| def optimize(cls, query: Query) -> Query: |
As these methods don't use the class state and are side effect free it is best to make them class methods, that way it is clearer to users that they don't have to be concerned with concurrency issues.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| def _optimize_node(self, cv: Any) -> Any: | |
| @classmethod | |
| def _optimize_node(cls, cv: Any) -> Any: |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -10,9 +10,10 @@ | |||||
| from rdflib.plugins.sparql.evaluate import evalQuery | ||||||
| from rdflib.plugins.sparql.parser import parseQuery, parseUpdate | ||||||
| from rdflib.plugins.sparql.sparql import Query | ||||||
| from rdflib.plugins.sparql.optimizer import SPARQLOptimizer | ||||||
| from rdflib.plugins.sparql.update import evalUpdate | ||||||
| from rdflib.query import Processor, Result, UpdateProcessor | ||||||
|
|
||||||
| from typing import List | ||||||
|
|
||||||
| def prepareQuery(queryString, initNs={}, base=None) -> Query: | ||||||
| """ | ||||||
|
|
@@ -63,8 +64,9 @@ def update(self, strOrQuery, initBindings={}, initNs={}): | |||||
|
|
||||||
|
|
||||||
| class SPARQLProcessor(Processor): | ||||||
| def __init__(self, graph): | ||||||
| def __init__(self, graph, optimizers: List[SPARQLOptimizer] = None): | ||||||
|
||||||
| def __init__(self, graph, optimizers: List[SPARQLOptimizer] = None): | |
| def __init__(self, graph, query_translators: Optional[List[_QueryTranslatorType]] = None): |
That way, users can pass methods or free functions, and even have multiple different translator methods on one class.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Your tests should still work fine with that.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| from rdflib import Graph | ||
| from rdflib.plugins.sparql.parser import * | ||
| # from rdflib.plugins.sparql.processor import prepareQuery | ||
| from rdflib.plugins.sparql.processor import translateQuery | ||
| from rdflib.plugins.sparql.processor import parseQuery | ||
| from rdflib.plugins.sparql.optimizer import ValuesToTheLeftOfTheJoin, RegexAsStringFunctionsOptimizer | ||
|
|
||
| query_slow = """ | ||
| PREFIX ex:<https://example.org/> | ||
|
|
||
| SELECT ?x { | ||
| ?x ?y ?z . | ||
| VALUES (?x) { | ||
| (ex:1) | ||
| (ex:2) | ||
| (ex:3) | ||
| } | ||
| } | ||
| """ | ||
|
|
||
| query_fast = """ | ||
| PREFIX ex:<https://example.org/> | ||
|
|
||
| SELECT ?x { | ||
| VALUES (?x) { | ||
| (ex:1) | ||
| (ex:2) | ||
| (ex:3) | ||
| } | ||
| ?x ?y ?z . | ||
| } | ||
| """ | ||
|
|
||
| query_regex = """ | ||
| PREFIX ex:<https://example.org/> | ||
|
|
||
| SELECT ?x { | ||
| ?x ?y ?z . | ||
| FILTER(regex("?z", "hi")) | ||
| } | ||
| """ | ||
|
|
||
| query_contains = """ | ||
| PREFIX ex:<https://example.org/> | ||
|
|
||
| SELECT ?x { | ||
| ?x ?y ?z . | ||
| FILTER(contains("?z", "hi")) | ||
| } | ||
| """ | ||
|
|
||
|
|
||
| def test_values_to_left(): | ||
| qs = _prepare_query(query_slow) | ||
| qf = _prepare_query(query_fast) | ||
| assert qs != qf | ||
| qso = ValuesToTheLeftOfTheJoin().optimize(qs) | ||
|
|
||
| assert qso.algebra == qf.algebra | ||
|
|
||
|
|
||
| def _prepare_query(str_or_query): | ||
| parse_tree = parseQuery(str_or_query) | ||
| query = translateQuery(parse_tree, None, {}) | ||
| return query | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| test_values_to_left() | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While this is valuable, I think it may be better to keep it in
rdflib._contrib, as we don't necessarily want to offer the same level of compatibility guarantees as we do for other code.