Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model querying, loading local ontology in memory #351

Draft
wants to merge 29 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
be3aa20
load ontologies
ssssarah Oct 24, 2023
a4f2a3d
source or ontology/shape path
ssssarah Oct 24, 2023
f7ddd2d
rm shapes and ontology path separation
ssssarah Oct 24, 2023
90196d2
getter for store and model in forge
ssssarah Oct 24, 2023
1532fe0
expose unimplemented method to tests
ssssarah Oct 24, 2023
ff4dee1
sparql query rewriter out of store
ssssarah Oct 24, 2023
358a70c
enable sparql query from model
ssssarah Oct 24, 2023
02f0555
rm comments
ssssarah Oct 24, 2023
246027d
mark test that is expected to fail
ssssarah Oct 24, 2023
ded1a40
query rewriting rewriting
ssssarah Oct 25, 2023
285baef
model sparql query returned as resource
ssssarah Oct 25, 2023
1821b90
merge sparql query builder and rewriter
ssssarah Oct 25, 2023
394382b
apply limit and offset in query builder
ssssarah Oct 25, 2023
495ed22
renaming
ssssarah Oct 25, 2023
ff9e624
resource id as str instead of str
ssssarah Oct 25, 2023
91c93ab
sparql query builder separate method for construct queries
ssssarah Oct 25, 2023
0986e3d
get file from which a shape originates from in rdf model service from…
ssssarah Oct 25, 2023
fd64e55
rm extra method from rebase
ssssarah Oct 26, 2023
642f340
fix pycodestyle
ssssarah Oct 26, 2023
0c0f713
merge master
ssssarah Nov 27, 2023
a8a7943
rm formatting
ssssarah Nov 27, 2023
d031570
reorganise init of rdf model service
ssssarah Nov 27, 2023
3a96e45
rm empty directory service file
ssssarah Nov 27, 2023
fe4a8bd
rm not_supported and NotImplemented when abstract
ssssarah Nov 27, 2023
f7638a7
smaller test ontology for model querying
ssssarah Nov 28, 2023
9d3cfee
fix pycodestyle
ssssarah Nov 28, 2023
b05de6f
linting and test of querying model
ssssarah Nov 28, 2023
8f09862
Merge branch 'master' into local_rdf_store
ssssarah Nov 28, 2023
e05ca33
Merge branch 'master' into local_rdf_store
ssssarah Dec 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions kgforge/core/archetypes/dataset_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ def types(self) -> Optional[List[str]]:
return list(self.model.mappings(self.model.source, False).keys())

def search(
self, filters: List[Union[Dict, Filter]], resolvers: Optional[List[Resolver]] = None,
**params
self, resolvers: Optional[List[Resolver]], filters: List[Union[Dict, Filter]], **params
) -> Optional[List[Resource]]:
"""Search within the database.
:param map: bool
Expand Down
56 changes: 53 additions & 3 deletions kgforge/core/archetypes/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,23 @@
import json
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Dict, List, Optional, Union, Type
from typing import Any, Dict, List, Optional, Union, Type, Tuple

import hjson
from pandas import DataFrame


from kgforge.core.resource import Resource
from kgforge.core.archetypes.mapping import Mapping
from kgforge.core.commons.attributes import repr_class, sort_attrs
from kgforge.core.commons.context import Context
from kgforge.core.commons.exceptions import ConfigurationError, ValidationError
from kgforge.core.commons.execution import run
from kgforge.core.commons.imports import import_class
from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder

DEFAULT_LIMIT = 100
DEFAULT_OFFSET = 0


class Model(ABC):
Expand Down Expand Up @@ -117,6 +122,48 @@ def _template(self, type: str, only_required: bool) -> Dict:

# Mappings.

def sparql(
self, query: str,
debug: bool,
limit: int = DEFAULT_LIMIT,
offset: int = DEFAULT_OFFSET,
**params
) -> List[Resource]:
rewrite = params.get("rewrite", True)

context_as_dict, prefixes, vocab = self.get_context_prefix_vocab()

qr = (
SPARQLQueryBuilder.rewrite_sparql(
query,
context_as_dict=context_as_dict,
prefixes=prefixes,
vocab=vocab
)
if self.context() is not None and rewrite
else query
)

qr = SPARQLQueryBuilder.apply_limit_and_offset_to_query(
query=qr,
limit=limit,
offset=offset,
default_limit=DEFAULT_LIMIT,
default_offset=DEFAULT_OFFSET
)

if debug:
SPARQLQueryBuilder.debug_query(qr)

return self._sparql(qr)

@abstractmethod
def _sparql(self, query: str) -> List[Resource]:
# POLICY Should notify of failures with exception QueryingError including a message.
# POLICY Resource _store_metadata should not be set (default is None).
# POLICY Resource _synchronized should not be set (default is False).
...

def sources(self, pretty: bool) -> Optional[List[str]]:
sources = sorted(self._sources())
if pretty:
Expand Down Expand Up @@ -189,8 +236,7 @@ def _initialize_service(self, source: str, **source_config) -> Any:
context_config = source_config.pop("context", {})
context_iri = context_config.get("iri", None)
if origin == "directory":
dirpath = Path(source)
return self._service_from_directory(dirpath, context_iri)
return self._service_from_directory(Path(source), context_iri)
if origin == "url":
return self._service_from_url(source, context_iri)
if origin == "store":
Expand All @@ -215,3 +261,7 @@ def _service_from_store(
store: 'Store', context_config: Optional[dict], **source_config
) -> Any:
...

@abstractmethod
def get_context_prefix_vocab(self) -> Tuple[Optional[Dict], Optional[Dict], Optional[str]]:
...
10 changes: 1 addition & 9 deletions kgforge/core/archetypes/read_only_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from kgforge.core.commons.exceptions import (
DownloadingError,
)
from kgforge.core.commons.execution import not_supported
from kgforge.core.commons.sparql_query_builder import SPARQLQueryBuilder
from kgforge.core.reshaping import collect_values
from kgforge.core.wrappings import Filter
Expand Down Expand Up @@ -54,16 +53,9 @@ def __init__(
def __repr__(self) -> str:
return repr_class(self)

@staticmethod
def _context_to_dict(context: Context):
return {
k: v["@id"] if isinstance(v, Dict) and "@id" in v else v
for k, v in context.document["@context"].items()
}

def get_context_prefix_vocab(self) -> Tuple[Optional[Dict], Optional[Dict], Optional[str]]:
return (
ReadOnlyStore._context_to_dict(self.model_context().document),
Context.context_to_dict(self.model_context()),
self.model_context().prefixes,
self.model_context().vocab
)
Expand Down
26 changes: 24 additions & 2 deletions kgforge/core/archetypes/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@

from kgforge.core.archetypes.read_only_store import ReadOnlyStore, DEFAULT_LIMIT, DEFAULT_OFFSET
from kgforge.core.archetypes.model import Model
from kgforge.core.commons import Context
from kgforge.core.resource import Resource
from kgforge.core.archetypes.mapping import Mapping
from kgforge.core.archetypes.mapper import Mapper
from kgforge.core.commons.attributes import repr_class
from kgforge.core.commons.context import Context
from kgforge.core.commons.es_query_builder import ESQueryBuilder
from kgforge.core.commons.exceptions import (
DeprecationError,
Expand All @@ -32,7 +32,7 @@
UpdatingError,
UploadingError
)
from kgforge.core.commons.execution import not_supported, run
from kgforge.core.commons.execution import run


class Store(ReadOnlyStore):
Expand Down Expand Up @@ -292,3 +292,25 @@ def _freeze_one(self, resource: Resource) -> None:
self._freeze_one(v)
if hasattr(resource, "id"):
resource.id = self.versioned_id_template.format(x=resource)

# Utils.

@abstractmethod
def _initialize_service(
self,
endpoint: Optional[str],
bucket: Optional[str],
token: Optional[str],
searchendpoints: Optional[Dict] = None,
**store_config,
) -> Any:
# POLICY Should initialize the access to the store according to its configuration.
pass

def rewrite_uri(self, uri: str, context: Context, **kwargs) -> str:
"""Rewrite a given uri using the store Context
:param uri: a URI to rewrite.
:param context: a Store Context object
:return: str
"""
pass
7 changes: 7 additions & 0 deletions kgforge/core/commons/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,10 @@ def is_http_iri(self):

def has_vocab(self):
return self.vocab is not None

@staticmethod
def context_to_dict(context: 'Context'):
return {
k: v["@id"] if isinstance(v, Dict) and "@id" in v else v
for k, v in context.document["@context"].items()
}
10 changes: 10 additions & 0 deletions kgforge/core/commons/es_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,16 @@ def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default

return query

@staticmethod
def apply_limit_and_offset_to_query(query, limit, default_limit, offset, default_offset):
# TODO should there be an elastic search default limit?
if limit:
query["size"] = limit
if offset:
query["from"] = offset

return query


def _look_up_known_parent_paths(f, last_path, property_path, m):
if (
Expand Down
8 changes: 8 additions & 0 deletions kgforge/core/forge.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,14 @@ def __init__(self, configuration: Union[str, Dict], **kwargs) -> None:
# Formatters.
self._formatters: Optional[Dict[str, str]] = config.pop("Formatters", None)

def get_model(self) -> Model:
"""Exposes the model."""
return self._model

def get_store(self) -> Store:
"""Exposes the store."""
return self._store

@catch
def prefixes(self, pretty: bool = True) -> Optional[Dict[str, str]]:
"""
Expand Down
8 changes: 7 additions & 1 deletion kgforge/specializations/models/demo_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
class DemoModel(Model):
"""An example to show how to implement a Model and to demonstrate how it is used."""

def _sparql(self, query: str) -> List[Resource]:
raise not_supported()

def get_context_prefix_vocab(self) -> Tuple[Optional[Dict], Optional[Dict], Optional[str]]:
raise not_supported()

# Vocabulary.

def _prefixes(self) -> Dict[str, str]:
Expand Down Expand Up @@ -96,7 +102,7 @@ def _validate_one(self, resource: Resource, type_: str) -> None:
# Utils.

@staticmethod
def _service_from_directory(dirpath: Path, context_iri: str, **dir_config):
def _service_from_directory(dirpath: Path, context_iri: Optional[str]):
return ModelLibrary(dirpath)

@staticmethod
Expand Down
20 changes: 18 additions & 2 deletions kgforge/specializations/models/rdf/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, shape: Shape) -> None:
@abstractmethod
def constraint(cls) -> URIRef:
"""Returns the Shacl constraint URI of the collector"""
raise NotImplementedError()
...

@abstractmethod
def collect(
Expand All @@ -64,7 +64,7 @@ def collect(
properties, attributes: Tuple(list,dict), the collected properties and attributes
respectively
"""
raise NotImplementedError()
...

def get_shape_target_classes(self) -> List:
"""Returns a list of target and implicit classes if any of the shape
Expand Down Expand Up @@ -488,3 +488,19 @@ def get_node_path(node: NodeProperties, path: URIRef, field: str):
else:
result.append(values)
return result


ALL_COLLECTORS = [
AndCollector,
OrCollector,
PropertyCollector,
NodeCollector,
PropertyCollector,
MinCountCollector,
DatatypeCollector,
InCollector,
ClassCollector,
NodeKindCollector,
XoneCollector,
HasValueCollector
]
88 changes: 88 additions & 0 deletions kgforge/specializations/models/rdf/pyshacl_shape_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from pyshacl import Shape, ShapesGraph
from rdflib import Graph, URIRef
from pyshacl.constraints import ALL_CONSTRAINT_PARAMETERS

from typing import List, Optional, Set, Tuple, Dict

from kgforge.specializations.models.rdf.collectors import ALL_COLLECTORS


ALL_COLLECTORS_MAP = {c.constraint(): c for c in ALL_COLLECTORS}


class ShapeWrapper(Shape):
__slots__ = ('__dict__',)

def __init__(self, shape: Shape) -> None:
super().__init__(shape.sg, shape.node, shape._p, shape._path, shape.logger)

def parameters(self):
return (
p for p, v in self.sg.predicate_objects(self.node)
if p in ALL_CONSTRAINT_PARAMETERS
)

def traverse(self, predecessors: Set[URIRef]) -> Tuple[List, Dict]:
""" traverses the Shape SHACL properties to collect constrained properties

This function is injected to pyshacl Shape object in order to traverse the Shacl graph.
It will call a specific collector depending on the SHACL property present in the NodeShape

Args:
predecessors: list of nodes that have being traversed, used to break circular
recursion

Returns:
properties, attributes: Tuple(list,dict), the collected properties and attributes
respectively gathered from the collectors
"""

parameters = self.parameters()
properties = []
attributes = {}
done_collectors = set()
for param in iter(parameters):
if param in ALL_COLLECTORS_MAP:
constraint_collector = ALL_COLLECTORS_MAP[param]
if constraint_collector not in done_collectors:
c = constraint_collector(self)
predecessors.add(self.node)
props, attrs = c.collect(predecessors)
if attrs:
attributes.update(attrs)
if props:
properties.extend(props)
done_collectors.add(constraint_collector)
if predecessors:
predecessors.remove(self.node)
else:
# FIXME: there are some SHACL constrains that are not implemented
# raise IndexError(f"{param} not implemented!")
pass

return properties, attributes


class ShapesGraphWrapper(ShapesGraph):

def __init__(self, graph: Graph) -> None:
super().__init__(graph)
# the following line triggers the shape loading -> see pyshacl.ShapesGraph
self._shapes = self.shapes

def lookup_shape_from_node(self, node: URIRef) -> Optional[ShapeWrapper]:
""" Overwrite function to inject the transverse function for only to requested nodes.

Args:
node (URIRef): The node to look up.

Returns:
Shape: The Shacl shape of the requested node.
"""
shape: Shape = self._node_shape_cache.get(node, None)
if shape:
return ShapeWrapper(shape)
# if not hasattr(shape_wrapper, "traverse"):
# shape_wrapper.traverse = types.MethodType(traverse, shape_wrapper)
# return shape_wrapper
return None
Loading
Loading