Skip to content

Commit

Permalink
2024 10 iri improvements (#2)
Browse files Browse the repository at this point in the history
* Update .gitignore and README

* Avoid boolean trap in VocabIRI methods

* Modernize typing

* Include IRI in query for easier results handling

* Refactor subject bool to triple_position enum

* Add graph_url check in VocabIRI

* Add some unit tests for VocabIRI and ProductIRI

* Add error handling and tests to  convert_json_object

* Add back prefix to triples query

---------

Co-authored-by: Chris Mutel <[email protected]>
  • Loading branch information
simonvanlierde and cmutel authored Oct 9, 2024
1 parent 0609770 commit eb341d1
Show file tree
Hide file tree
Showing 7 changed files with 305 additions and 63 deletions.
36 changes: 36 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,39 @@ dmypy.json

# Pyre type checker
.pyre/


### MacOS ###
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### VS Code ###
.vscode/
*.code-workspace

# Local History for Visual Studio Code
.history/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ please [file an issue][Issue Tracker] along with a detailed description.
You can build the documentation locally by installing the documentation Conda environment:

```bash
conda env create -f docs/environment.yml
conda env create -f docs/environment.yaml
```

activating the environment
Expand Down
138 changes: 78 additions & 60 deletions sentier_data_tools/iri/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from typing import List, Optional, Union
"""Module for querying RDF triples from sentier.dev vocabularies.
This module provides base classes and utility functions to handle IRIs
and retrieve RDF triples from vocabularies like products and units using SPARQL queries.
"""

from rdflib import Graph, URIRef

from sentier_data_tools.iri.utils import (
VOCAB_FUSEKI,
TriplePosition,
convert_json_object,
display_value_for_uri,
execute_sparql_query,
Expand All @@ -13,76 +18,89 @@


class VocabIRI(URIRef):
def triples(self, subject: bool = True, limit: Optional[int] = 25) -> List[tuple]:
"""Return a list of triples with `rdflib` objects"""
if subject:
QUERY = f"""
SELECT ?p ?o
FROM <{self.graph_url}>
WHERE {{
<{str(self)}> ?p ?o
}}
"""
else:
QUERY = f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?s ?p
FROM <{self.graph_url}>
WHERE {{
?s ?p <{str(self)}>
}}
"""
"""Base class for standard queries for IRIs from sentier.dev vocabularies."""

def triples(
self,
*,
iri_position: TriplePosition = TriplePosition.SUBJECT,
limit: int | None = 25,
) -> list[tuple]:
"""Get triples from a sentier.dev vocabulary for the given IRI.
Args:
iri_position (TriplePosition, optional): The IRI position in the triple
(SUBJECT, PREDICATE, or OBJECT). Defaults to TriplePosition.SUBJECT.
limit (int | None, optional): The maximum number of triples to return.
Defaults to 25.
Returns:
list[tuple]: A list of triples from a sentier.dev vocabulary.
"""
# Ensure a vocabulary graph_url is defined in a subclass
if not getattr(self, "graph_url", None):
error_msg = (
f"{self.__class__.__name__} must define a 'graph_url' attribute "
"to indicate the vocabulary graph URL."
)
logger.error(error_msg)
raise AttributeError(error_msg)

# pylint: disable=no-member
QUERY = f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?s ?p ?o
FROM <{self.graph_url}>
WHERE {{
VALUES ?{iri_position.value} {{ <{str(self)}> }}
?s ?p ?o
}}
"""

if limit is not None:
QUERY += f"LIMIT {int(limit)}"
logger.debug(f"Executing query:\n{QUERY}")
results = execute_sparql_query(QUERY)
logger.info(f"Retrieved {len(results)} triples from {VOCAB_FUSEKI}")

if subject:
return [
(
URIRef(str(self)),
convert_json_object(line["p"]),
convert_json_object(line["o"]),
)
for line in results
]
else:
return [
(
convert_json_object(line["s"]),
convert_json_object(line["p"]),
URIRef(str(self)),
)
for line in results
]
return [
tuple(convert_json_object(line[key]) for key in ["s", "p", "o"])
for line in results
]

def __repr__(self) -> str:
return self.display()

def display(self) -> str:
return display_value_for_uri(str(self), self.kind, self.graph_url)

def graph(self, subject: bool = True) -> Graph:
"""Return an `rdflib` graph of the data from the sentier.dev vocabulary for this IRI"""
def graph(
self,
*,
iri_position: TriplePosition = TriplePosition.SUBJECT,
) -> Graph:
"""Return an `rdflib` graph of the data from the sentier.dev vocabulary for this IRI."""
graph = Graph()
for triple in self.triples(subject=subject, limit=None):
for triple in self.triples(
iri_position=iri_position,
limit=None,
):
graph.add(triple)
return graph

def narrower(
self, include_self: bool = False, raw_strings: bool = False
) -> Union[list["VocabIRI"], list[str]]:
) -> list["VocabIRI"] | list[str]:
QUERY = f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?o ?s
FROM <{self.graph_url}>
WHERE {{
<{str(self)}> skos:narrower+ ?o .
?o skos:broader ?s .
}}"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?o ?s
FROM <{self.graph_url}>
WHERE {{
<{str(self)}> skos:narrower+ ?o .
?o skos:broader ?s .
}}"""
logger.debug(f"Executing query:\n{QUERY}")
results = [
(elem["s"]["value"], elem["o"]["value"])
Expand All @@ -98,16 +116,16 @@ def narrower(

def broader(
self, include_self: bool = False, raw_strings: bool = False
) -> Union[list["VocabIRI"], list[str]]:
) -> list["VocabIRI"] | list[str]:
QUERY = f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?o ?s
FROM <{self.graph_url}>
WHERE {{
<{str(self)}> skos:broader+ ?o .
?o skos:narrower ?s .
}}"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?o ?s
FROM <{self.graph_url}>
WHERE {{
<{str(self)}> skos:broader+ ?o .
?o skos:narrower ?s .
}}"""
logger.debug(f"Executing query:\n{QUERY}")
results = [
(elem["s"]["value"], elem["o"]["value"])
Expand Down
20 changes: 18 additions & 2 deletions sentier_data_tools/iri/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import locale
import platform
from collections import defaultdict, deque
from enum import Enum
from functools import lru_cache
import os
from typing import Union

from rdflib import Literal, URIRef
from SPARQLWrapper import JSON, SPARQLWrapper

from sentier_data_tools.logs import stdout_feedback_logger as logger

if language := os.environ.get("SDT_LOCALE"):
pass
elif platform.system() == "Windows":
Expand All @@ -31,13 +34,18 @@ def execute_sparql_query(query: str) -> list:
return sparql.queryAndConvert()["results"]["bindings"]


def convert_json_object(obj: dict) -> Union[URIRef, Literal]:
def convert_json_object(obj: dict) -> URIRef | Literal:
if "value" not in obj:
error_msg = f"Missing 'value' key in object: {obj}"
logger.error(error_msg)
raise ValueError(error_msg)

if obj["type"] == "literal":
return Literal(
obj["value"], lang=obj.get("xml:lang"), datatype=obj.get("datatype")
)
else:
return URIRef(obj["value"])
return URIRef(str(obj["value"]))


@lru_cache(maxsize=2048)
Expand Down Expand Up @@ -99,3 +107,11 @@ def resolve_hierarchy(
queue.append(code)

return ordered


class TriplePosition(Enum):
"""Represents the position of an object in a triple store."""

SUBJECT = "s"
PREDICATE = "p"
OBJECT = "o"
Empty file added tests/iri/__init__.py
Empty file.
109 changes: 109 additions & 0 deletions tests/iri/test_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Unit tests for IRI classes and their helper functions.
These tests focus on verifying the core functionality of the VocabIRI class and its
subclasses (ProductIRI and UnitIRI). The primary behavior of ProductIRI and UnitIRI,
such as SPARQL querying and triple retrieval, should be covered in integration tests.
"""

from unittest.mock import patch

import pytest
from rdflib import Literal, URIRef

from sentier_data_tools.iri.main import ProductIRI, VocabIRI
from sentier_data_tools.iri.utils import TriplePosition


# Test Vocab IRI class without graph_url attribute
class IncompleteVocabIRI(VocabIRI):
"""Subclass of VocabIRI without graph_url to test the get_url error."""


@pytest.fixture
def incomplete_vocab_iri() -> IncompleteVocabIRI:
"""Incomplete VocabIRI subclass fixture for testing."""
return IncompleteVocabIRI("https://example.org/incomplete/123")


def test_vocab_iri_missing_graph_url(
incomplete_vocab_iri: IncompleteVocabIRI,
) -> None:
"""Test that AttributeError is raised when graph_url is missing."""
with pytest.raises(AttributeError, match="must define a 'graph_url' attribute"):
incomplete_vocab_iri.triples()


# Test base functionality of ProductIRI for each TriplePosition value


@pytest.fixture
def product_iri() -> ProductIRI:
"""ProductIRI fixture for testing."""
return ProductIRI("https://example.com/product/123")


def mock_sparql_result(
mock_execute_sparql_query: patch, iri_value: URIRef, position: TriplePosition
) -> None:
"""A helper function to mock the SPARQL query result for a given TriplePosition."""
default_values = {
"s": {"type": "uri", "value": "https://example.com/default_subject"},
"p": {"type": "uri", "value": "https://example.com/default_predicate"},
"o": {"type": "literal", "value": "default_object"},
}

# Update the default values based on the IRI position in the triple
if position == TriplePosition.SUBJECT:
default_values["s"] = {"type": "uri", "value": iri_value}
elif position == TriplePosition.PREDICATE:
default_values["p"] = {"type": "uri", "value": iri_value}
elif position == TriplePosition.OBJECT:
default_values["o"] = {"type": "uri", "value": iri_value}

mock_execute_sparql_query.return_value = [default_values]


@pytest.mark.parametrize(
"position",
[TriplePosition.SUBJECT, TriplePosition.PREDICATE, TriplePosition.OBJECT],
)
@patch("sentier_data_tools.iri.main.execute_sparql_query")
def test_product_iri_triples_for_all_positions(
mock_execute_sparql_query: patch, product_iri: ProductIRI, position: TriplePosition
) -> None:
"""Test that ProductIRI works for all values of TriplePosition."""
product_iri_str = str(product_iri)

# Mock the SPARQL result
mock_sparql_result(mock_execute_sparql_query, product_iri_str, position)

# Call the triples method
triples = product_iri.triples(iri_position=position)

# Ensure that triples are returned
assert triples, "Expected triples but got empty results"

# Unpack the triple elements (subject, predicate, object)
subject, predicate, obj = triples[0]

# Common assertions for subject and predicate types
assert isinstance(subject, URIRef)
assert isinstance(predicate, URIRef)
assert len(triples) == 1

# Check the expected values based on the position
if position == TriplePosition.SUBJECT:
assert isinstance(obj, Literal)
assert str(subject) == product_iri_str
assert str(predicate) == "https://example.com/default_predicate"
assert str(obj) == "default_object"
elif position == TriplePosition.PREDICATE:
assert isinstance(obj, Literal)
assert str(subject) == "https://example.com/default_subject"
assert str(predicate) == product_iri_str
assert str(obj) == "default_object"
elif position == TriplePosition.OBJECT:
assert isinstance(obj, URIRef)
assert str(subject) == "https://example.com/default_subject"
assert str(predicate) == "https://example.com/default_predicate"
assert str(obj) == product_iri_str
Loading

0 comments on commit eb341d1

Please sign in to comment.