Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model querying, loading local ontology in memory #351

Draft
wants to merge 29 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
be3aa20
load ontologies
ssssarah Oct 24, 2023
a4f2a3d
source or ontology/shape path
ssssarah Oct 24, 2023
f7ddd2d
rm shapes and ontology path separation
ssssarah Oct 24, 2023
90196d2
getter for store and model in forge
ssssarah Oct 24, 2023
1532fe0
expose unimplemented method to tests
ssssarah Oct 24, 2023
ff4dee1
sparql query rewriter out of store
ssssarah Oct 24, 2023
358a70c
enable sparql query from model
ssssarah Oct 24, 2023
02f0555
rm comments
ssssarah Oct 24, 2023
246027d
mark test that is expected to fail
ssssarah Oct 24, 2023
ded1a40
query rewriting rewriting
ssssarah Oct 25, 2023
285baef
model sparql query returned as resource
ssssarah Oct 25, 2023
1821b90
merge sparql query builder and rewriter
ssssarah Oct 25, 2023
394382b
apply limit and offset in query builder
ssssarah Oct 25, 2023
495ed22
renaming
ssssarah Oct 25, 2023
ff9e624
resource id as str instead of str
ssssarah Oct 25, 2023
91c93ab
sparql query builder separate method for construct queries
ssssarah Oct 25, 2023
0986e3d
get file from which a shape originates from in rdf model service from…
ssssarah Oct 25, 2023
fd64e55
rm extra method from rebase
ssssarah Oct 26, 2023
642f340
fix pycodestyle
ssssarah Oct 26, 2023
0c0f713
merge master
ssssarah Nov 27, 2023
a8a7943
rm formatting
ssssarah Nov 27, 2023
d031570
reorganise init of rdf model service
ssssarah Nov 27, 2023
3a96e45
rm empty directory service file
ssssarah Nov 27, 2023
fe4a8bd
rm not_supported and NotImplemented when abstract
ssssarah Nov 27, 2023
f7638a7
smaller test ontology for model querying
ssssarah Nov 28, 2023
9d3cfee
fix pycodestyle
ssssarah Nov 28, 2023
b05de6f
linting and test of querying model
ssssarah Nov 28, 2023
8f09862
Merge branch 'master' into local_rdf_store
ssssarah Nov 28, 2023
e05ca33
Merge branch 'master' into local_rdf_store
ssssarah Dec 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 51 additions & 5 deletions kgforge/core/archetypes/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import hjson
from pandas import DataFrame
from rdflib import URIRef


from kgforge.core import Resource
from kgforge.core.archetypes import Mapping
Expand All @@ -27,6 +29,11 @@
from kgforge.core.commons.exceptions import ConfigurationError, ValidationError
from kgforge.core.commons.execution import not_supported, run
from kgforge.core.commons.imports import import_class
from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder


DEFAULT_LIMIT = 100
DEFAULT_OFFSET = 0


class Model(ABC):
Expand Down Expand Up @@ -114,6 +121,44 @@ def _template(self, type: str, only_required: bool) -> Dict:

# Mappings.

def sparql(
self, query: str,
debug: bool,
limit: int = DEFAULT_LIMIT,
offset: int = DEFAULT_OFFSET,
**params
) -> List[Resource]:
rewrite = params.get("rewrite", True)

qr = (
SPARQLQueryBuilder.rewrite_sparql(
query,
self.context(),
metadata_context=None # TODO smth else?
)
if self.context() is not None and rewrite
else query
)

qr = SPARQLQueryBuilder.apply_limit_and_offset_to_query(
query=qr,
limit=limit,
offset=offset,
default_limit=DEFAULT_LIMIT,
default_offset=DEFAULT_OFFSET
)

if debug:
SPARQLQueryBuilder.debug_query(qr)

return self._sparql(qr)

def _sparql(self, query: str) -> List[Resource]:
# POLICY Should notify of failures with exception QueryingError including a message.
# POLICY Resource _store_metadata should not be set (default is None).
# POLICY Resource _synchronized should not be set (default is False).
not_supported()

def sources(self, pretty: bool) -> Optional[List[str]]:
sources = sorted(self._sources())
if pretty:
Expand Down Expand Up @@ -151,8 +196,8 @@ def mapping(self, entity: str, source: str, type: Callable) -> Mapping:

# Validation.

def schema_id(self, type: str) -> str:
# POLICY Should retrieve the schema id of the given type.
def schema_source(self, type: str) -> str:
# POLICY Should retrieve the schema source of the given type.
not_supported()

def validate(self, data: Union[Resource, List[Resource]],
Expand Down Expand Up @@ -180,9 +225,10 @@ def _initialize_service(self, source: str, **source_config) -> Any:
origin = source_config.pop("origin")
context_config = source_config.pop("context", {})
context_iri = context_config.get("iri", None)

if origin == "directory":
dirpath = Path(source)
return self._service_from_directory(dirpath, context_iri)

return self._service_from_directory(dir_path=Path(source), context_iri=context_iri)
if origin == "url":
return self._service_from_url(source, context_iri)
if origin == "store":
Expand All @@ -193,7 +239,7 @@ def _initialize_service(self, source: str, **source_config) -> Any:

@staticmethod
@abstractmethod
def _service_from_directory(dirpath: Path, context_iri: Optional[str]) -> Any:
def _service_from_directory(dir_path: Path, context_iri: Optional[str]) -> Any:
pass

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion kgforge/core/archetypes/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def _build_resolving_query(text, query_template, deprecated_property, filters, s
Filter(operator=FilterOperator.EQUAL, path=path, value=value)
)
target_query_statements, target_query_filters = query_builder.build(
None, None, resolving_context, *configured_target_filters
None, None, resolving_context, configured_target_filters
)

target_query_statements = ";\n ".join(target_query_statements)
Expand Down
178 changes: 32 additions & 146 deletions kgforge/core/archetypes/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from kgforge.core.archetypes import Mapping, Mapper
from kgforge.core.commons.attributes import repr_class
from kgforge.core.commons.context import Context
from kgforge.core.commons.es_query_builder import ESQueryBuilder
from kgforge.core.commons.exceptions import (
DeprecationError,
DownloadingError,
Expand All @@ -34,62 +35,15 @@
QueryingError,
)
from kgforge.core.commons.execution import not_supported, run
from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder
from kgforge.core.reshaping import collect_values

# NB: Do not 'from kgforge.core.archetypes import Resolver' to avoid cyclic dependency.

# FIXME: need to find a comprehensive way (different than list) to get all SPARQL reserved clauses
from kgforge.core.wrappings.dict import DictWrapper

DEFAULT_LIMIT = 100
DEFAULT_OFFSET = 0
SPARQL_CLAUSES = [
"where",
"filter",
"select",
"union",
"limit",
"construct",
"optional",
"bind",
"values",
"offset",
"order by",
"prefix",
"graph",
"distinct",
"in",
"as",
"base",
"prefix",
"reduced",
"describe",
"ask",
"named",
"asc",
"desc",
"from",
"optional",
"graph",
"regex",
"union",
"str",
"lang",
"langmatches",
"datatype",
"bound",
"sameTerm",
"isIRI",
"isURI",
"isBLANK",
"isLITERAL",
"group",
"by",
"order",
"minus",
"not",
"exists"
]


class Store(ABC):
Expand Down Expand Up @@ -426,21 +380,35 @@ def search(
not_supported()

def sparql(
self, query: str, debug: bool, limit: int = DEFAULT_LIMIT, offset: int = DEFAULT_OFFSET,
self, query: str,
debug: bool,
limit: int = DEFAULT_LIMIT,
offset: int = DEFAULT_OFFSET,
**params
) -> List[Resource]:
rewrite = params.get("rewrite", True)

qr = (
rewrite_sparql(query, self.model_context, self.service.metadata_context)
SPARQLQueryBuilder.rewrite_sparql(
query,
context=self.model_context,
metadata_context=self.service.metadata_context,
)
if self.model_context is not None and rewrite
else query
)
if limit:
qr = _replace_in_sparql(qr, "LIMIT", limit, DEFAULT_LIMIT, r" LIMIT \d+")
if offset:
qr = _replace_in_sparql(qr, "OFFSET", offset, DEFAULT_OFFSET, r" OFFSET \d+")

qr = SPARQLQueryBuilder.apply_limit_and_offset_to_query(
qr,
limit=limit,
offset=offset,
default_limit=DEFAULT_LIMIT,
default_offset=DEFAULT_OFFSET
)

if debug:
self._debug_query(qr)
SPARQLQueryBuilder.debug_query(qr)

return self._sparql(qr)

def _sparql(self, query: str) -> List[Resource]:
Expand All @@ -453,12 +421,16 @@ def elastic(
self, query: str, debug: bool, limit: int = DEFAULT_LIMIT, offset: int = DEFAULT_OFFSET
) -> List[Resource]:
query_dict = json.loads(query)
if limit:
query_dict["size"] = limit
if offset:
query_dict["from"] = offset

query_dict = ESQueryBuilder.apply_limit_and_offset_to_query(
query_dict,
limit=limit, default_limit=None,
offset=offset, default_offset=None
)

if debug:
self._debug_query(query_dict)
ESQueryBuilder.debug_query(query_dict)

return self._elastic(json.dumps(query_dict))

def _elastic(self, query: str) -> List[Resource]:
Expand Down Expand Up @@ -512,96 +484,10 @@ def _initialize_service(
# POLICY Should initialize the access to the store according to its configuration.
pass

@staticmethod
def _debug_query(query):
if isinstance(query, Dict):
print("Submitted query:", query)
else:
print(*["Submitted query:", *query.splitlines()], sep="\n ")
print()

def rewrite_uri(self, uri: str, context: Context, **kwargs) -> str:
"""Rewrite a given uri using the store Context
:param uri: a URI to rewrite.
:param context: a Store Context object
:return: str
"""
pass


def _replace_in_sparql(qr, what, value, default_value, search_regex, replace_if_in_query=True):
is_what_in_query = bool(re.search(f"{search_regex}", qr, flags=re.IGNORECASE))
if is_what_in_query and value and not replace_if_in_query:
raise QueryingError(
f"Value for '{what}' is present in the provided query and set as argument: set 'replace_if_in_query' to True to replace '{what}' when present in the query.")
replace_value = f" {what} {value}" if value else (
f" {what} {default_value}" if default_value else None)
if is_what_in_query and replace_if_in_query and replace_value:
qr = re.sub(f"{search_regex}", replace_value, qr, flags=re.IGNORECASE)
if not is_what_in_query and replace_value:
qr = f"{qr} {replace_value}"
return qr


def rewrite_sparql(query: str, context: Context, metadata_context) -> str:
"""Rewrite local property and type names from Model.template() as IRIs.

Local names are mapped to IRIs by using a JSON-LD context, i.e. { "@context": { ... }} from a kgforge.core.commons.Context.
In the case of contexts using prefixed names, prefixes are added to the SPARQL query prologue.
In the case of non available contexts and vocab then the query is returned unchanged.
"""
ctx = {}
if metadata_context and metadata_context.document:
ctx.update({
k: v["@id"] if isinstance(v, Dict) and "@id" in v else v
for k, v in metadata_context.document["@context"].items()
})
ctx.update({
k: v["@id"] if isinstance(v, Dict) and "@id" in v else v
for k, v in context.document["@context"].items()
})
prefixes = context.prefixes
has_prefixes = prefixes is not None and len(prefixes.keys()) > 0
if ctx.get("type") == "@type":
if "rdf" in prefixes:
ctx["type"] = "rdf:type"
else:
ctx["type"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"

def replace(match: Match) -> str:
m4 = match.group(4)
if m4 is None:
return match.group(0)

v = (
ctx.get(m4, ":" + m4 if context.has_vocab() else None)
if str(m4).lower() not in SPARQL_CLAUSES and not str(m4).startswith("https")
else m4
)
if v is None:
raise QueryingError(
f"Failed to construct a valid SPARQL query: add '{m4}'"
f", define an @vocab in the configured JSON-LD context or provide a fully correct SPARQL query."
)
m5 = match.group(5)
if "//" in v:
return f"<{v}>{m5}"

return f"{v}{m5}"

g4 = r"([a-zA-Z_]+)"
g5 = r"([.;]?)"
g0 = rf"((?<=[\s,[(/|!^])((a|true|false)|{g4}){g5}(?=[\s,\])/|?*+]))"
g6 = r"(('[^']+')|('''[^\n\r]+''')|(\"[^\"]+\")|(\"\"\"[^\n\r]+\"\"\"))"
rx = rf"{g0}|{g6}|(?<=< )(.*)(?= >)"
qr = re.sub(rx, replace, query, flags=re.VERBOSE | re.MULTILINE)

if not has_prefixes or "prefix" in str(qr).lower():
return qr

pfx = "\n".join(f"PREFIX {k}: <{v}>" for k, v in prefixes.items())

if context.has_vocab():
pfx = "\n".join([pfx, f"PREFIX : <{context.vocab}>"])

return f"{pfx}\n{qr}"
14 changes: 12 additions & 2 deletions kgforge/core/commons/es_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def build(
schema: Dict,
resolvers: Optional[List["Resolver"]],
context: Context,
*filters,
filters: List[Filter],
**params,
) -> Tuple[List, List, List]:

Expand All @@ -59,7 +59,7 @@ def build(
m._update_from_dict(schema)
dynamic = m._meta["dynamic"] if "dynamic" in m._meta else dynamic

for index, f in enumerate(*filters):
for index, f in enumerate(filters):
_filter = None
must = None
must_not = None
Expand Down Expand Up @@ -184,6 +184,16 @@ def build(
def build_resource_from_response(query: str, response: Dict, context: Context, *args, **params) -> List[Resource]:
not_supported()

@staticmethod
def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default_offset):
# TODO should there be an elastic search default limit?
if limit:
query["size"] = limit
if offset:
query["from"] = offset

return query


def _look_up_known_parent_paths(f, last_path, property_path, m):
if (
Expand Down
Loading
Loading