Skip to content

Commit

Permalink
refactor: re-organize some common functions and variable references (#6)
Browse files Browse the repository at this point in the history
* reorganize common functions and variable refs

Signed-off-by: Jaideep Rao <[email protected]>

* adding small doc changes

Signed-off-by: tarilabs <[email protected]>

---------

Signed-off-by: Jaideep Rao <[email protected]>
Signed-off-by: tarilabs <[email protected]>
Co-authored-by: tarilabs <[email protected]>
  • Loading branch information
jaideepr97 and tarilabs authored Dec 24, 2024
1 parent ece9e4b commit c7ceea6
Show file tree
Hide file tree
Showing 15 changed files with 251 additions and 121 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ __pycache__
tmp
efi-variable-store
*.onnx
.DS_store
.vscode

# Generated python library wheels
dist
Expand Down
89 changes: 20 additions & 69 deletions olot/basics.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,39 @@

import hashlib
import os
from pathlib import Path
from pprint import pprint
import tarfile
from typing import Dict, List
import typing
import click
import tarfile
import gzip
import click

from .oci.oci_config import OCIManifestConfig

from .oci.oci_image_index import OCIImageIndex
from .oci.oci_image_manifest import OCIImageManifest, ContentDescriptor

from .oci.oci_image_layout import ImageLayoutVersion, OCIImageLayout

class HashingWriter:
def __init__(self, base_writer, hash_func=None):
self.base_writer = base_writer
self.hash_func = hash_func or hashlib.sha256()

def write(self, data: bytes):
self.hash_func.update(data)
return self.base_writer.write(data)

def tell(self):
return self.base_writer.tell()

def close(self):
self.base_writer.close()

from olot.oci.oci_config import OCIManifestConfig

def get_file_hash(path) -> str:
h = hashlib.sha256()
with open(path, "rb") as f:
while chunk := f.read(4096):
h.update(chunk)
return h.hexdigest()
from olot.oci.oci_image_index import OCIImageIndex, read_ocilayout_root_index
from olot.oci.oci_image_manifest import OCIImageManifest, ContentDescriptor
from olot.oci.oci_image_layout import verify_ocilayout
from olot.oci.oci_common import MediaTypes

from olot.utils.files import HashingWriter, tar_filter_fn, tarball_from_file, targz_from_file
from olot.utils.types import compute_hash_of_str

def oci_layers_on_top(ocilayout: Path, model_files: List[os.PathLike], modelcard: typing.Union[os.PathLike, None] = None):
check_ocilayout(ocilayout)
verify_ocilayout(ocilayout)
ocilayout_root_index = read_ocilayout_root_index(ocilayout)
ocilayout_indexes: Dict[str, OCIImageIndex] = crawl_ocilayout_indexes(ocilayout, ocilayout_root_index)
ocilayout_manifests: Dict[str, OCIImageManifest] = crawl_ocilayout_manifests(ocilayout, ocilayout_indexes)
new_layers = {} # layer digest : diff_id

sha256_path = ocilayout / "blobs" / "sha256"
for model in model_files:
model = Path(model)
new_layer = tar_into_ocilayout(ocilayout, model)
new_layer = tarball_from_file(model, sha256_path)
new_layers[new_layer] = new_layer
if modelcard is not None:
modelcard_layer_diffid = targz_into_ocilayout(ocilayout, Path(modelcard))
modelcard_layer_diffid = targz_from_file(Path(modelcard), sha256_path)
new_layers[modelcard_layer_diffid[0]] = modelcard_layer_diffid[1]

new_ocilayout_manifests: Dict[str, str] = {}
for manifest_hash, manifest in ocilayout_manifests.items():
print(manifest_hash, manifest.mediaType)
Expand All @@ -62,7 +43,7 @@ def oci_layers_on_top(ocilayout: Path, model_files: List[os.PathLike], modelcard
mc = OCIManifestConfig.model_validate_json(cf.read())
for layer, diffid in new_layers.items():
size = os.stat(ocilayout / "blobs" / "sha256" / layer).st_size
mt = "application/vnd.oci.image.layer.v1.tar" if layer == diffid else "application/vnd.oci.image.layer.v1.tar+gzip"
mt = MediaTypes.layer if layer == diffid else MediaTypes.layer_gzip
la = None if layer == diffid else {"io.opendatahub.temp.layer.type":"modelcard"}
cd = ContentDescriptor(
mediaType=mt,
Expand Down Expand Up @@ -112,12 +93,12 @@ def oci_layers_on_top(ocilayout: Path, model_files: List[os.PathLike], modelcard
new_ocilayout_indexes[index_hash] = index_json_hash
pprint(new_ocilayout_indexes)
for entry in ocilayout_root_index.manifests:
if entry.mediaType == "application/vnd.oci.image.index.v1+json":
if entry.mediaType == MediaTypes.index:
lookup_new_hash = new_ocilayout_indexes[entry.digest.removeprefix("sha256:")]
print(f"old index {entry.digest} is now at {lookup_new_hash}")
entry.digest = "sha256:" + lookup_new_hash
entry.size = os.stat(ocilayout / "blobs" / "sha256" / lookup_new_hash).st_size
elif entry.mediaType == "application/vnd.oci.image.manifest.v1+json":
elif entry.mediaType == MediaTypes.manifest:
lookup_new_hash = new_ocilayout_manifests[entry.digest.removeprefix("sha256:")]
print(f"old index {entry.digest} is now at {lookup_new_hash}")
entry.digest = "sha256:" + lookup_new_hash
Expand All @@ -128,18 +109,13 @@ def oci_layers_on_top(ocilayout: Path, model_files: List[os.PathLike], modelcard
root_idx_f.write(ocilayout_root_index.model_dump_json(exclude_none=True))


def compute_hash_of_str(content: str) -> str:
h = hashlib.sha256()
h.update(content.encode())
return h.hexdigest()


def crawl_ocilayout_manifests(ocilayout: Path, ocilayout_indexes: Dict[str, OCIImageIndex]) -> Dict[str, OCIImageManifest]:
ocilayout_manifests: Dict[str, OCIImageManifest] = {}
for _, mi in ocilayout_indexes.items():
for m in mi.manifests:
print(m)
if m.mediaType != "application/vnd.oci.image.manifest.v1+json":
if m.mediaType != MediaTypes.manifest:
raise ValueError("Did not expect something else than Image Manifest in a Index")
target_hash = m.digest.removeprefix("sha256:")
print(target_hash)
Expand All @@ -152,7 +128,7 @@ def crawl_ocilayout_manifests(ocilayout: Path, ocilayout_indexes: Dict[str, OCII
def crawl_ocilayout_indexes(ocilayout: Path, ocilayout_root_index: OCIImageIndex) -> Dict[str, OCIImageIndex] :
ocilayout_indexes: Dict[str, OCIImageIndex] = {}
for m in ocilayout_root_index.manifests:
if m.mediaType == "application/vnd.oci.image.index.v1+json":
if m.mediaType == MediaTypes.index:
target_hash = m.digest.removeprefix("sha256:")
index_path = ocilayout / "blobs" / "sha256" / target_hash
with open(index_path, "r") as ip:
Expand All @@ -163,16 +139,6 @@ def crawl_ocilayout_indexes(ocilayout: Path, ocilayout_root_index: OCIImageIndex
else:
click.echo(f"Found Image Manifest {m.digest} in root index, TODO assuming these are referred through the other indexes")
return ocilayout_indexes


def check_ocilayout(ocilayout: Path):
with open(ocilayout / "oci-layout", "r") as f:
m = OCIImageLayout.model_validate_json(f.read())
if not m.imageLayoutVersion == ImageLayoutVersion.field_1_0_0:
raise ValueError(f"Unexpected ocilayout in {ocilayout}")
else:
return True


def tar_into_ocilayout(ocilayout: Path, model: Path):
sha256_path = ocilayout / "blobs" / "sha256"
Expand Down Expand Up @@ -204,20 +170,5 @@ def targz_into_ocilayout(ocilayout: Path, model: Path):
os.rename(temp_tar_filename, sha256_path / final_tar_filename)
return (checksum, tar_checksum)


def tar_filter_fn(input: tarfile.TarInfo) -> tarfile.TarInfo :
input.uid = 0
input.gid = 0
input.mode = 0o664
return input


def read_ocilayout_root_index(ocilayout: Path) -> OCIImageIndex:
ocilayout_root_index = None
with open(ocilayout / "index.json", "r") as f:
ocilayout_root_index = OCIImageIndex.model_validate_json(f.read())
return ocilayout_root_index


if __name__ == "__main__":
print("?")
31 changes: 12 additions & 19 deletions olot/oci/oci_common.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@

from typing import Annotated, Any, Dict, List
from typing import Annotated, List
from pydantic import AnyUrl, Field


MediaType = Annotated[str, Field(
...,
pattern=r'^[A-Za-z0-9][A-Za-z0-9!#$&^_.+-]{0,126}/[A-Za-z0-9][A-Za-z0-9!#$&^_.+-]{0,126}$'
)]

class MediaTypes:
"""Constant values from OCI Image Manifest spec
See also: https://github.com/opencontainers/image-spec/blob/main/media-types.md
"""
manifest: MediaType = "application/vnd.oci.image.manifest.v1+json"
index: MediaType = "application/vnd.oci.image.index.v1+json"
layer: MediaType = "application/vnd.oci.image.layer.v1.tar"
layer_gzip: MediaType = "application/vnd.oci.image.layer.v1.tar+gzip"


Digest = Annotated[str, Field(
...,
Expand All @@ -20,22 +31,4 @@
)]


NonEmptyString = Annotated[str, Field(..., pattern=r".{1,}")]


MapStringString = Annotated[Dict[NonEmptyString, str], Field(...)]


MapStringObject = Annotated[Dict[NonEmptyString, Any], Field(...)]


Int8 = Annotated[int, Field(ge=-128, le=127)]


Int64 = Annotated[int, Field(ge=-9223372036854776000, le=9223372036854776000)]


Base64 = Annotated[str, Field()]


Annotations = Annotated[MapStringString, Field()]
2 changes: 1 addition & 1 deletion olot/oci/oci_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from pydantic import BaseModel, Field

from .oci_common import MapStringString, MapStringObject
from olot.utils.types import MapStringString, MapStringObject


class Type(Enum):
Expand Down
13 changes: 11 additions & 2 deletions olot/oci/oci_image_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
from __future__ import annotations

from typing import Annotated, List, Optional
from pathlib import Path

from pydantic import BaseModel, Field

from .oci_common import MediaType, Digest, Urls, Int64, Base64, Annotations
from olot.oci.oci_common import MediaType, Digest, Urls
from olot.utils.types import Int64, Base64, Annotations

class Platform(BaseModel):
architecture: str
Expand Down Expand Up @@ -56,7 +58,7 @@ class Platform(BaseModel):
# __root__: Dict[constr(regex=r'.{1,}'), str]
# class MapStringString(RootModel[Dict[constr(pattern=r".{1,}"), str]]):
# """
# A Pydantic RootModel for a dictionary where keys are non-empty strings
# A Pydantic RootModel for a dictionary where keys are non-empty strings
# and values are strings.
# """
# root: Dict[constr(pattern=r".{1,}"), str] = Field(
Expand Down Expand Up @@ -190,3 +192,10 @@ class OCIImageIndex(BaseModel):
subject: Optional[ContentDescriptor] = None
manifests: List[Manifest]
annotations: Optional[Annotations] = None


def read_ocilayout_root_index(ocilayout: Path) -> OCIImageIndex:
ocilayout_root_index = None
with open(ocilayout / "index.json", "r") as f:
ocilayout_root_index = OCIImageIndex.model_validate_json(f.read())
return ocilayout_root_index
9 changes: 9 additions & 0 deletions olot/oci/oci_image_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import annotations

from enum import Enum
from pathlib import Path

from pydantic import BaseModel, Field

Expand All @@ -17,3 +18,11 @@ class OCIImageLayout(BaseModel):
imageLayoutVersion: ImageLayoutVersion = Field(
..., description='version of the OCI Image Layout (in the oci-layout file)'
)

def verify_ocilayout(ocilayout: Path):
with open(ocilayout / "oci-layout", "r") as f:
m = OCIImageLayout.model_validate_json(f.read())
if not m.imageLayoutVersion == ImageLayoutVersion.field_1_0_0:
raise ValueError(f"Unexpected ocilayout in {ocilayout}")
else:
return True
3 changes: 2 additions & 1 deletion olot/oci/oci_image_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from pydantic import BaseModel, Field

from .oci_common import MediaType, Digest, Urls, Int64, Base64, Annotations
from olot.oci.oci_common import MediaType, Digest, Urls
from olot.utils.types import Int64, Base64, Annotations

# class MediaType(BaseModel):
# __root__: constr(
Expand Down
Loading

0 comments on commit c7ceea6

Please sign in to comment.