diff --git a/model_signing/serialization/serialize_by_file_shard.py b/model_signing/serialization/serialize_by_file_shard.py index 59aa07e8..c16c1fdd 100644 --- a/model_signing/serialization/serialize_by_file_shard.py +++ b/model_signing/serialization/serialize_by_file_shard.py @@ -14,10 +14,11 @@ """Model serializers that operated at file shard level of granularity.""" +import abc import base64 import concurrent.futures import pathlib -from typing import Callable, Iterable, TypeAlias +from typing import Callable, Iterable, cast from typing_extensions import override from model_signing.hashing import file @@ -27,21 +28,16 @@ from model_signing.serialization import serialize_by_file -_ShardSignTask: TypeAlias = tuple[pathlib.PurePath, str, int, int] - - def _build_header( *, - entry_name: str, - entry_type: str, + name: str, start: int, end: int, ) -> bytes: - """Builds a header to encode a path with given name and type. + """Builds a header to encode a path with given name and shard range. Args: entry_name: The name of the entry to build the header for. - entry_type: The type of the entry (file or directory). start: Offset for the start of the path shard. end: Offset for the end of the path shard. @@ -50,14 +46,11 @@ def _build_header( bytes. Each argument is separated by dots and the last byte is also a dot (so the file digest can be appended unambiguously). """ - # Note: This will get replaced in subsequent change, right now we're just - # moving existing code around. - encoded_type = entry_type.encode("utf-8") # Prevent confusion if name has a "." inside by encoding to base64. - encoded_name = base64.b64encode(entry_name.encode("utf-8")) + encoded_name = base64.b64encode(name.encode("utf-8")) encoded_range = f"{start}-{end}".encode("utf-8") # Note: empty string at the end, to terminate header with a "." - return b".".join([encoded_type, encoded_name, encoded_range, b""]) + return b".".join([encoded_name, encoded_range, b""]) def _endpoints(step: int, end: int) -> Iterable[int]: @@ -83,164 +76,15 @@ def _endpoints(step: int, end: int) -> Iterable[int]: yield end -class ShardedDFSSerializer(serialization.Serializer): - """DFSSerializer that uses a sharded hash engine to exploit parallelism.""" - - def __init__( - self, - file_hasher_factory: Callable[ - [pathlib.Path, int, int], file.ShardedFileHasher - ], - merge_hasher: hashing.StreamingHashEngine, - max_workers: int | None = None, - ): - """Initializes an instance to serialize a model with this serializer. - - Args: - hasher_factory: A callable to build the hash engine used to hash - every shard of the files in the model. Because each shard is - processed in parallel, every thread needs to call the factory to - start hashing. The arguments are the file, and the endpoints of - the shard. - merge_hasher: A `hashing.StreamingHashEngine` instance used to merge - individual file digests to compute an aggregate digest. - max_workers: Maximum number of workers to use in parallel. Default - is to defer to the `concurent.futures` library. - """ - self._file_hasher_factory = file_hasher_factory - self._merge_hasher = merge_hasher - self._max_workers = max_workers - - # Precompute some private values only once by using a mock file hasher. - # None of the arguments used to build the hasher are used. - hasher = file_hasher_factory(pathlib.Path(), 0, 1) - self._shard_size = hasher.shard_size - - @override - def serialize(self, model_path: pathlib.Path) -> manifest.DigestManifest: - # Note: This function currently uses `pathlib.Path.glob` so the DFS - # expansion relies on the `glob` implementation performing a DFS. We - # will be truthful again when switching to `pathlib.Path.walk`, after - # Python 3.12 is the minimum version we support. - - # TODO: github.com/sigstore/model-transparency/issues/196 - Add checks - # to exclude symlinks if desired. - serialize_by_file.check_file_or_directory(model_path) - - if model_path.is_file(): - entries = [model_path] - else: - # TODO: github.com/sigstore/model-transparency/issues/200 - When - # Python3.12 is the minimum supported version, this can be replaced - # with `pathlib.Path.walk` for a clearer interface, and some speed - # improvement. - entries = sorted(model_path.glob("**/*")) - - tasks = self._convert_paths_to_tasks(entries, model_path) - - digest_len = self._merge_hasher.digest_size - digests_buffer = bytearray(len(tasks) * digest_len) - - with concurrent.futures.ThreadPoolExecutor( - max_workers=self._max_workers - ) as tpe: - futures_dict = { - tpe.submit(self._perform_hash_task, model_path, task): i - for i, task in enumerate(tasks) - } - for future in concurrent.futures.as_completed(futures_dict): - i = futures_dict[future] - task_digest = future.result() - - task_path, task_type, task_start, task_end = tasks[i] - header = _build_header( - entry_name=task_path.name, - entry_type=task_type, - start=task_start, - end=task_end, - ) - self._merge_hasher.reset(header) - self._merge_hasher.update(task_digest) - digest = self._merge_hasher.compute().digest_value - - start = i * digest_len - end = start + digest_len - digests_buffer[start:end] = digest - - self._merge_hasher.reset(digests_buffer) - return manifest.DigestManifest(self._merge_hasher.compute()) - - def _convert_paths_to_tasks( - self, paths: Iterable[pathlib.Path], root_path: pathlib.Path - ) -> list[_ShardSignTask]: - """Returns the tasks that would hash shards of files in parallel. - - Every file in `paths` is replaced by a set of tasks. Each task computes - the digest over a shard of the file. Directories result in a single - task, just to compute a digest over a header. - - To differentiate between (empty) files and directories with the same - name, every task needs to also include a header. The header needs to - include relative path to the model root, as we want to obtain the same - digest if the model is moved. - - We don't construct an enum for the type of the entry, because these will - never escape this class. - - Note that the path component of the tasks is a `pathlib.PurePath`, so - operations on it cannot touch the filesystem. - """ - # TODO: github.com/sigstore/model-transparency/issues/196 - Add support - # for excluded files. - - tasks = [] - for path in paths: - serialize_by_file.check_file_or_directory(path) - relative_path = path.relative_to(root_path) - - if path.is_file(): - path_size = path.stat().st_size - start = 0 - for end in _endpoints(self._shard_size, path_size): - tasks.append((relative_path, "file", start, end)) - start = end - else: - tasks.append((relative_path, "dir", 0, 0)) - - return tasks - - def _perform_hash_task( - self, model_path: pathlib.Path, task: _ShardSignTask - ) -> bytes: - """Produces the hash of the file shard included in `task`.""" - task_path, task_type, task_start, task_end = task - - # TODO: github.com/sigstore/model-transparency/issues/197 - Directories - # don't need to use the file hasher. Rather than starting a process - # just for them, we should filter these ahead of time, and only use - # threading for file shards. For now, just return an empty result. - if task_type == "dir": - return b"" - - # TODO: github.com/sigstore/model-transparency/issues/197 - Similarly, - # empty files should be hashed outside of a parallel task, to not waste - # resources. - if task_start == task_end: - return b"" - - full_path = model_path.joinpath(task_path) - hasher = self._file_hasher_factory(full_path, task_start, task_end) - return hasher.compute().digest_value - - class ShardedFilesSerializer(serialization.Serializer): - """Model serializers that produces an itemized manifest, at shard level. + """Generic file shard serializer. Traverses the model directory and creates digests for every file found, sharding the file in equal shards and computing the digests in parallel. - Since the manifest lists each item individually, this will also enable - support for incremental updates (to be added later). + Subclasses can then create a manifest with these digests, either listing + them item by item, combining them into file digests, or combining all of + them into a single digest. """ def __init__( @@ -270,9 +114,7 @@ def __init__( self._shard_size = hasher.shard_size @override - def serialize( - self, model_path: pathlib.Path - ) -> manifest.ShardLevelManifest: + def serialize(self, model_path: pathlib.Path) -> manifest.Manifest: # TODO: github.com/sigstore/model-transparency/issues/196 - Add checks # to exclude symlinks if desired. serialize_by_file.check_file_or_directory(model_path) @@ -337,12 +179,96 @@ def _compute_hash( path=relative_path, digest=digest, start=start, end=end ) + @abc.abstractmethod def _build_manifest( self, items: Iterable[manifest.ShardedFileManifestItem] - ) -> manifest.ShardLevelManifest: + ) -> manifest.Manifest: """Builds an itemized manifest from a given list of items. Every subclass needs to implement this method to determine the format of the manifest. """ + pass + + +class ManifestSerializer(ShardedFilesSerializer): + """Model serializers that produces an itemized manifest, at shard level. + + Since the manifest lists each item individually, this will also enable + support for incremental updates (to be added later). + """ + + @override + def serialize( + self, model_path: pathlib.Path + ) -> manifest.ShardLevelManifest: + """Serializes the model given by the `model_path` argument. + + The only reason for the override is to change the return type, to be + more restrictive. This is to signal that the only manifests that can be + returned are `manifest.FileLevelManifest` instances. + """ + return cast(manifest.ShardLevelManifest, super().serialize(model_path)) + + @override + def _build_manifest( + self, items: Iterable[manifest.ShardedFileManifestItem] + ) -> manifest.ShardLevelManifest: return manifest.ShardLevelManifest(items) + + +class DigestSerializer(ShardedFilesSerializer): + """Serializer for a model that performs a traversal of the model directory. + + This serializer produces a single hash for the entire model. + """ + + def __init__( + self, + file_hasher_factory: Callable[ + [pathlib.Path, int, int], file.ShardedFileHasher + ], + merge_hasher: hashing.StreamingHashEngine, + max_workers: int | None = None, + ): + """Initializes an instance to serialize a model with this serializer. + + Args: + hasher_factory: A callable to build the hash engine used to hash + every shard of the files in the model. Because each shard is + processed in parallel, every thread needs to call the factory to + start hashing. The arguments are the file, and the endpoints of + the shard. + merge_hasher: A `hashing.StreamingHashEngine` instance used to merge + individual file shard digests to compute an aggregate digest. + max_workers: Maximum number of workers to use in parallel. Default + is to defer to the `concurent.futures` library. + """ + super().__init__(file_hasher_factory, max_workers) + self._merge_hasher = merge_hasher + + @override + def serialize(self, model_path: pathlib.Path) -> manifest.DigestManifest: + """Serializes the model given by the `model_path` argument. + + The only reason for the override is to change the return type, to be + more restrictive. This is to signal that the only manifests that can be + returned are `manifest.FileLevelManifest` instances. + """ + return cast(manifest.DigestManifest, super().serialize(model_path)) + + @override + def _build_manifest( + self, items: Iterable[manifest.ShardedFileManifestItem] + ) -> manifest.DigestManifest: + self._merge_hasher.reset() + + for item in sorted(items, key=lambda i: (i.path, i.start, i.end)): + header = _build_header( + name=item.path.name, start=item.start, end=item.end + ) + self._merge_hasher.update(header) + self._merge_hasher.update(item.digest.digest_value) + + digest = self._merge_hasher.compute() + return manifest.DigestManifest(digest) diff --git a/model_signing/serialization/serialize_by_file_shard_test.py b/model_signing/serialization/serialize_by_file_shard_test.py index 3e554452..a7a5e5ba 100644 --- a/model_signing/serialization/serialize_by_file_shard_test.py +++ b/model_signing/serialization/serialize_by_file_shard_test.py @@ -56,7 +56,7 @@ def test_known_models(self, request, model_fixture_name): model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(model) @@ -82,7 +82,7 @@ def test_known_models_small_shards(self, request, model_fixture_name): model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory_small_shards, memory.SHA256() ) manifest = serializer.serialize(model) @@ -98,7 +98,7 @@ def test_known_models_small_shards(self, request, model_fixture_name): assert manifest.digest.digest_hex == expected_digest def test_file_hash_is_not_same_as_hash_of_content(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) @@ -108,7 +108,7 @@ def test_file_hash_is_not_same_as_hash_of_content(self, sample_model_file): assert manifest.digest.digest_hex != digest.digest_hex def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_file) @@ -122,7 +122,7 @@ def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): def test_file_model_hash_changes_if_content_changes( self, sample_model_file ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_file) @@ -134,7 +134,7 @@ def test_file_model_hash_changes_if_content_changes( assert manifest.digest.digest_value != new_manifest.digest.digest_value def test_directory_model_with_only_known_file(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest_file = serializer.serialize(sample_model_file) @@ -148,7 +148,7 @@ def test_directory_model_with_only_known_file(self, sample_model_file): def test_folder_model_hash_is_same_if_model_is_moved( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -159,8 +159,8 @@ def test_folder_model_hash_is_same_if_model_is_moved( assert manifest == new_manifest - def test_folder_model_empty_folder_gets_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + def test_folder_model_empty_folder_not_included(self, sample_model_folder): + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -170,10 +170,10 @@ def test_folder_model_empty_folder_gets_included(self, sample_model_folder): new_empty_dir.mkdir() new_manifest = serializer.serialize(sample_model_folder) - assert manifest != new_manifest + assert manifest == new_manifest - def test_folder_model_empty_file_gets_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + def test_folder_model_empty_file_not_included(self, sample_model_folder): + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -183,10 +183,10 @@ def test_folder_model_empty_file_gets_included(self, sample_model_folder): new_empty_file.write_text("") new_manifest = serializer.serialize(sample_model_folder) - assert manifest != new_manifest + assert manifest == new_manifest def test_folder_model_rename_file(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -200,7 +200,7 @@ def test_folder_model_rename_file(self, sample_model_folder): assert manifest != new_manifest def test_folder_model_rename_dir(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -213,7 +213,7 @@ def test_folder_model_rename_dir(self, sample_model_folder): assert manifest != new_manifest def test_folder_model_replace_file_empty_folder(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -227,7 +227,7 @@ def test_folder_model_replace_file_empty_folder(self, sample_model_folder): assert manifest != new_manifest def test_folder_model_change_file(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -239,22 +239,22 @@ def test_folder_model_change_file(self, sample_model_folder): assert manifest != new_manifest - def test_empty_folder_hashes_differently_than_empty_file( + def test_empty_folder_hashes_same_as_empty_file( self, empty_model_file, empty_model_folder ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) folder_manifest = serializer.serialize(empty_model_folder) file_manifest = serializer.serialize(empty_model_file) - assert folder_manifest != file_manifest + assert folder_manifest == file_manifest - def test_model_with_empty_folder_hashes_differently_than_with_empty_file( + def test_model_with_empty_folder_hashes_same_as_with_empty_file( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) @@ -270,15 +270,15 @@ def test_model_with_empty_folder_hashes_differently_than_with_empty_file( new_empty_file.write_text("") file_manifest = serializer.serialize(sample_model_folder) - assert folder_manifest != file_manifest + assert folder_manifest == file_manifest def test_max_workers_does_not_change_digest(self, sample_model_folder): - serializer1 = serialize_by_file_shard.ShardedDFSSerializer( + serializer1 = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest1 = serializer1.serialize(sample_model_folder) - serializer2 = serialize_by_file_shard.ShardedDFSSerializer( + serializer2 = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256(), max_workers=2 ) manifest2 = serializer2.serialize(sample_model_folder) @@ -286,12 +286,12 @@ def test_max_workers_does_not_change_digest(self, sample_model_folder): assert manifest1 == manifest2 def test_shard_size_changes_digests(self, sample_model_folder): - serializer1 = serialize_by_file_shard.ShardedDFSSerializer( + serializer1 = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest1 = serializer1.serialize(sample_model_folder) - serializer2 = serialize_by_file_shard.ShardedDFSSerializer( + serializer2 = serialize_by_file_shard.DigestSerializer( self._hasher_factory_small_shards, memory.SHA256() ) manifest2 = serializer2.serialize(sample_model_folder) @@ -365,7 +365,7 @@ def test_known_models(self, request, model_fixture_name): model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(model) @@ -398,7 +398,7 @@ def test_known_models_small_shards(self, request, model_fixture_name): model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory_small_shards ) manifest = serializer.serialize(model) @@ -420,7 +420,7 @@ def test_known_models_small_shards(self, request, model_fixture_name): assert items == found_items def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_file) @@ -432,7 +432,7 @@ def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): assert manifest == new_manifest def test_file_manifest_changes_if_content_changes(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_file) @@ -449,7 +449,7 @@ def test_file_manifest_changes_if_content_changes(self, sample_model_file): assert digests != new_digests def test_directory_model_with_only_known_file(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest_file = serializer.serialize(sample_model_file) @@ -466,7 +466,7 @@ def test_directory_model_with_only_known_file(self, sample_model_file): def test_folder_model_hash_is_same_if_model_is_moved( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -478,7 +478,7 @@ def test_folder_model_hash_is_same_if_model_is_moved( assert manifest == new_manifest def test_folder_model_empty_folder_not_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -491,7 +491,7 @@ def test_folder_model_empty_folder_not_included(self, sample_model_folder): assert manifest == new_manifest def test_folder_model_empty_file_not_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -529,7 +529,7 @@ def _check_manifests_match_except_on_renamed_file( def test_folder_model_rename_file_only_changes_path_part( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -577,7 +577,7 @@ def _check_manifests_match_except_on_renamed_dir( def test_folder_model_rename_dir_only_changes_path_part( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -593,7 +593,7 @@ def test_folder_model_rename_dir_only_changes_path_part( ) def test_folder_model_replace_file_empty_folder(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -632,7 +632,7 @@ def _check_manifests_match_except_on_entry( assert old_manifest._item_to_digest[shard] == digest def test_folder_model_change_file(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -649,13 +649,13 @@ def test_folder_model_change_file(self, sample_model_folder): ) def test_max_workers_does_not_change_digest(self, sample_model_folder): - serializer1 = serialize_by_file_shard.ShardedFilesSerializer( + serializer1 = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) - serializer2 = serialize_by_file_shard.ShardedFilesSerializer( + serializer2 = serialize_by_file_shard.ManifestSerializer( self._hasher_factory, max_workers=1 ) - serializer3 = serialize_by_file_shard.ShardedFilesSerializer( + serializer3 = serialize_by_file_shard.ManifestSerializer( self._hasher_factory, max_workers=3 ) diff --git a/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/deep_model_folder b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/deep_model_folder new file mode 100644 index 00000000..fb544575 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/deep_model_folder @@ -0,0 +1 @@ +36eed9389ebbbe15ac15d33c81dabb60ccb7c945ff641d78f59db9aa9dc47ac9 diff --git a/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/empty_model_file b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/empty_model_file new file mode 100644 index 00000000..c3068040 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/empty_model_file @@ -0,0 +1 @@ +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/empty_model_folder b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/empty_model_folder new file mode 100644 index 00000000..c3068040 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/empty_model_folder @@ -0,0 +1 @@ +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/model_folder_with_empty_file b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/model_folder_with_empty_file new file mode 100644 index 00000000..1a4bcbef --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/model_folder_with_empty_file @@ -0,0 +1 @@ +68efd863f20e083173846a5e98ad11387a1979efe20ded426a7930bab8358a9c diff --git a/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/sample_model_file b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/sample_model_file new file mode 100644 index 00000000..2ebc572a --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/sample_model_file @@ -0,0 +1 @@ +3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b diff --git a/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/sample_model_folder b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/sample_model_folder new file mode 100644 index 00000000..ed0f9979 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestDFSSerializer/sample_model_folder @@ -0,0 +1 @@ +310af4fc4c52bf63cd1687c67076ed3e56bc5480a1b151539e6c550506ae0301 diff --git a/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/deep_model_folder b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/deep_model_folder new file mode 100644 index 00000000..44d8daa4 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/deep_model_folder @@ -0,0 +1,4 @@ +d0/d1/d2/d3/d4/f0:6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff +d0/d1/d2/d3/d4/f1:a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2 +d0/d1/d2/d3/d4/f2:5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0 +d0/d1/d2/d3/d4/f3:eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008 diff --git a/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/empty_model_file b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/empty_model_file new file mode 100644 index 00000000..aff969ce --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/empty_model_file @@ -0,0 +1 @@ +.:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/empty_model_folder b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/empty_model_folder new file mode 100644 index 00000000..e69de29b diff --git a/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/model_folder_with_empty_file b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/model_folder_with_empty_file new file mode 100644 index 00000000..2f7997e9 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/model_folder_with_empty_file @@ -0,0 +1 @@ +empty_file:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/sample_model_file b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/sample_model_file new file mode 100644 index 00000000..96d0bc82 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/sample_model_file @@ -0,0 +1 @@ +.:3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b diff --git a/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/sample_model_folder b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/sample_model_folder new file mode 100644 index 00000000..4328868e --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file/TestFilesSerializer/sample_model_folder @@ -0,0 +1,10 @@ +d0/f00:fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249 +d0/f01:e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6 +d0/f02:407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426 +d1/f10:6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f +d1/f11:a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf +d1/f12:8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3 +f0:997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc +f1:c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae +f2:700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719 +f3:912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder index 528ab87c..b3a94824 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder @@ -1 +1 @@ -52fa3c459aec58bc5f9702c73cb3c6b8fd19e9342aa3e4db851e1bde69ab1727 +6deb22c4330a8a9eb5a2d5faa73bf56c64a5c2888961f0f0df51912798fc4954 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards index a4f2f81e..f826b95f 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards @@ -1 +1 @@ -abd66cd0d8a01f3f552ac5af717f49dc6e6575f0849ec3bfb3c9051962314ce6 +f5203504bea9ec90a7b7453a53c0aaab98a5db5d038dc1fac3613b47f6018959 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file index 9ac3ea65..c3068040 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file @@ -1 +1 @@ -5f2d126b0d3540c17481fdf724e31cf03b4436a2ebabaa1d2e94fe09831be64d +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards index 9ac3ea65..c3068040 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards @@ -1 +1 @@ -5f2d126b0d3540c17481fdf724e31cf03b4436a2ebabaa1d2e94fe09831be64d +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file index b6d24eaf..c3068040 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file @@ -1 +1 @@ -230d217d5f4f388f5087ac4174dbc9b0ff358e3122a1267b0a56669a44f11ea1 +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards index b6d24eaf..c3068040 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards @@ -1 +1 @@ -230d217d5f4f388f5087ac4174dbc9b0ff358e3122a1267b0a56669a44f11ea1 +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file index a94a0fa0..8ec1d11f 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file @@ -1 +1 @@ -2ca48c47d5311a9b2f9305519cd5f927dcef09404fc32ef7886abe8f11450eff +14aebf2e466ad30ef59ea6fce67de44dc133c673784bd543b45f75b8efc3d821 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards index 5b6697c8..7b4ad705 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards @@ -1 +1 @@ -284b613e2e1576d87e5e1c912c82da8d87b6350276f36940516404b2a35f1a74 +beb3cbbd9d73133e85a102a3cbda2ef1dc2bc61e9323e32e576e4adb0571bf86 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder index 7fa49a73..c94ba5d0 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder @@ -1 +1 @@ -d22e0441cfa5ac2bc09715ddd88c802a7f97e29c93dc50f5498bab2954958ebb +865a7da87d90b261ce99086bfc61986a6230e6914ad885912b4d22464a9fda13 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards index 161cafdf..6e6fd67f 100644 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards @@ -1 +1 @@ -82bb608d88cf741730c5bcb75a7630f560643acafdd8fa02ad24be20f51c1250 +02be357fc0015ab3d15dbbd363a172f35d2cbd1a854b8e0a6c67fad2e2c3390f