diff --git a/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py b/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py index 05fd1405a3f..01b7fdd69c3 100644 --- a/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py +++ b/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py @@ -415,7 +415,10 @@ class name in this argument. experiment.spec.max_failed_trial_count = max_failed_trial_count # If users choose to use a custom objective function. - if objective is not None: + if objective is not None or parameters is not None: + if objective is None or base_image is None or parameters is None: + raise ValueError("One of the required parameters is None") + # Add metrics collector to the Katib Experiment. # Up to now, we only support parameter `kind`, of which default value # is `StdOut`, to specify the kind of metrics collector. @@ -504,9 +507,9 @@ class name in this argument. # If users choose to use external models and datasets. else: if ( - not model_provider_parameters - or not dataset_provider_parameters - or not trainer_parameters + model_provider_parameters is None + or dataset_provider_parameters is None + or trainer_parameters is None ): raise ValueError("One of the required parameters is None") @@ -518,6 +521,7 @@ class name in this argument. from kubeflow.storage_initializer.hugging_face import ( HuggingFaceDatasetParams, HuggingFaceModelParams, + HuggingFaceTrainerParams, ) from kubeflow.storage_initializer.s3 import S3DatasetParams from kubeflow.training import models as training_models @@ -567,7 +571,7 @@ class name in this argument. ) except Exception as e: pvc_list = self.core_api.list_namespaced_persistent_volume_claim( - namespace + namespace=namespace ) # Check if the PVC with the specified name exists. for pvc in pvc_list.items: @@ -596,6 +600,11 @@ class name in this argument. "or HuggingFaceDatasetParams." ) + if not isinstance(trainer_parameters, HuggingFaceTrainerParams): + raise ValueError( + "Trainer parameters must be an instance of HuggingFaceTrainerParams." + ) + # Iterate over input parameters and do substitutions. experiment_params = [] trial_params = [] @@ -633,6 +642,8 @@ class name in this argument. model_provider_parameters.model_uri, "--transformer_type", model_provider_parameters.transformer_type.__name__, + "--num_labels", + str(model_provider_parameters.num_labels), "--model_dir", VOLUME_PATH_MODEL, "--dataset_dir", @@ -643,7 +654,11 @@ class name in this argument. f"'{training_args}'", ], volume_mounts=[STORAGE_INITIALIZER_VOLUME_MOUNT], - resources=resources_per_trial.resources_per_worker, + resources=( + resources_per_trial.resources_per_worker + if resources_per_trial + else None + ), ) # Create the worker and the master pod. @@ -677,7 +692,10 @@ class name in this argument. ), ) - if resources_per_trial.num_procs_per_worker: + if ( + resources_per_trial is not None + and resources_per_trial.num_procs_per_worker + ): pytorchjob.spec.nproc_per_node = str( resources_per_trial.num_procs_per_worker ) @@ -689,7 +707,7 @@ class name in this argument. ) ) - if resources_per_trial.num_workers > 1: + if resources_per_trial is not None and resources_per_trial.num_workers > 1: pytorchjob.spec.pytorch_replica_specs["Worker"] = ( training_models.KubeflowOrgV1ReplicaSpec( replicas=resources_per_trial.num_workers - 1, diff --git a/sdk/python/v1beta1/kubeflow/katib/api/katib_client_test.py b/sdk/python/v1beta1/kubeflow/katib/api/katib_client_test.py index fef18adfa0f..229a1af3b4f 100644 --- a/sdk/python/v1beta1/kubeflow/katib/api/katib_client_test.py +++ b/sdk/python/v1beta1/kubeflow/katib/api/katib_client_test.py @@ -2,8 +2,10 @@ from typing import List, Optional from unittest.mock import Mock, patch +import kubeflow.katib as katib import kubeflow.katib.katib_api_pb2 as katib_api_pb2 import pytest +import transformers from kubeflow.katib import ( KatibClient, V1beta1AlgorithmSpec, @@ -16,8 +18,15 @@ V1beta1TrialTemplate, ) from kubeflow.katib.constants import constants +from kubeflow.storage_initializer.hugging_face import ( + HuggingFaceDatasetParams, + HuggingFaceModelParams, + HuggingFaceTrainerParams, +) from kubernetes.client import V1ObjectMeta +PVC_FAILED = "pvc creation failed" + TEST_RESULT_SUCCESS = "success" @@ -57,6 +66,27 @@ def get_observation_log_response(*args, **kwargs): ) +def create_namespaced_persistent_volume_claim_response(*args, **kwargs): + if kwargs.get("namespace") == PVC_FAILED: + raise Exception("PVC creation failed") + else: + return {"metadata": {"name": "tune_test"}} + + +def list_namespaced_persistent_volume_claim_response(*args, **kwargs): + if kwargs.get("namespace") == PVC_FAILED: + mock_pvc = Mock() + mock_pvc.metadata.name = "pvc_failed" + mock_list = Mock() + mock_list.items = [mock_pvc] + else: + mock_pvc = Mock() + mock_pvc.metadata.name = "tune_test" + mock_list = Mock() + mock_list.items = [mock_pvc] + return mock_list + + def generate_trial_template() -> V1beta1TrialTemplate: trial_spec = { "apiVersion": "batch/v1", @@ -270,6 +300,212 @@ def create_experiment( ] +test_tune_data = [ + ( + "missing name", + { + "name": None, + "objective": lambda x: x, + "parameters": {"a": katib.search.int(min=10, max=100)}, + }, + ValueError, + ), + ( + "invalid hybrid parameters - objective and model_provider_parameters", + { + "name": "tune_test", + "objective": lambda x: x, + "model_provider_parameters": HuggingFaceModelParams( + model_uri="hf://google-bert/bert-base-cased", + transformer_type=transformers.AutoModelForSequenceClassification, + num_labels=5, + ), + }, + ValueError, + ), + ( + "missing parameters - no custom objective or external model tuning", + { + "name": "tune_test", + }, + ValueError, + ), + ( + "missing parameters in custom objective tuning - lack parameters", + { + "name": "tune_test", + "objective": lambda x: x, + }, + ValueError, + ), + ( + "missing parameters in custom objective tuning - lack objective", + { + "name": "tune_test", + "parameters": {"a": katib.search.int(min=10, max=100)}, + }, + ValueError, + ), + ( + "missing parameters in external model tuning - lack dataset_provider_parameters " + "and trainer_parameters", + { + "name": "tune_test", + "model_provider_parameters": HuggingFaceModelParams( + model_uri="hf://google-bert/bert-base-cased", + transformer_type=transformers.AutoModelForSequenceClassification, + num_labels=5, + ), + }, + ValueError, + ), + ( + "missing parameters in external model tuning - lack model_provider_parameters " + "and trainer_parameters", + { + "name": "tune_test", + "dataset_provider_parameters": HuggingFaceDatasetParams( + repo_id="yelp_review_full", + split="train[:3000]", + ), + }, + ValueError, + ), + ( + "missing parameters in external model tuning - lack model_provider_parameters " + "and dataset_provider_parameters", + { + "name": "tune_test", + "trainer_parameters": HuggingFaceTrainerParams( + training_parameters=transformers.TrainingArguments( + output_dir="test_tune_api", + learning_rate=katib.search.double(min=1e-05, max=5e-05), + ), + ), + }, + ValueError, + ), + ( + "invalid env_per_trial", + { + "name": "tune_test", + "objective": lambda x: x, + "parameters": {"a": katib.search.int(min=10, max=100)}, + "env_per_trial": "invalid", + }, + ValueError, + ), + ( + "invalid model_provider_parameters", + { + "name": "tune_test", + "model_provider_parameters": "invalid", + "dataset_provider_parameters": HuggingFaceDatasetParams( + repo_id="yelp_review_full", + split="train[:3000]", + ), + "trainer_parameters": HuggingFaceTrainerParams( + training_parameters=transformers.TrainingArguments( + output_dir="test_tune_api", + learning_rate=katib.search.double(min=1e-05, max=5e-05), + ), + ), + }, + ValueError, + ), + ( + "invalid dataset_provider_parameters", + { + "name": "tune_test", + "model_provider_parameters": HuggingFaceModelParams( + model_uri="hf://google-bert/bert-base-cased", + transformer_type=transformers.AutoModelForSequenceClassification, + num_labels=5, + ), + "dataset_provider_parameters": "invalid", + "trainer_parameters": HuggingFaceTrainerParams( + training_parameters=transformers.TrainingArguments( + output_dir="test_tune_api", + learning_rate=katib.search.double(min=1e-05, max=5e-05), + ), + ), + }, + ValueError, + ), + ( + "invalid trainer_parameters", + { + "name": "tune_test", + "model_provider_parameters": HuggingFaceModelParams( + model_uri="hf://google-bert/bert-base-cased", + transformer_type=transformers.AutoModelForSequenceClassification, + num_labels=5, + ), + "dataset_provider_parameters": HuggingFaceDatasetParams( + repo_id="yelp_review_full", + split="train[:3000]", + ), + "trainer_parameters": "invalid", + }, + ValueError, + ), + ( + "pvc creation failed", + { + "name": "tune_test", + "namespace": PVC_FAILED, + "model_provider_parameters": HuggingFaceModelParams( + model_uri="hf://google-bert/bert-base-cased", + transformer_type=transformers.AutoModelForSequenceClassification, + num_labels=5, + ), + "dataset_provider_parameters": HuggingFaceDatasetParams( + repo_id="yelp_review_full", + split="train[:3000]", + ), + "trainer_parameters": HuggingFaceTrainerParams( + training_parameters=transformers.TrainingArguments( + output_dir="test_tune_api", + learning_rate=katib.search.double(min=1e-05, max=5e-05), + ), + ), + }, + RuntimeError, + ), + ( + "valid flow with custom objective tuning", + { + "name": "tune_test", + "objective": lambda x: x, + "parameters": {"a": katib.search.int(min=10, max=100)}, + }, + TEST_RESULT_SUCCESS, + ), + ( + "valid flow with external model tuning", + { + "name": "tune_test", + "model_provider_parameters": HuggingFaceModelParams( + model_uri="hf://google-bert/bert-base-cased", + transformer_type=transformers.AutoModelForSequenceClassification, + num_labels=5, + ), + "dataset_provider_parameters": HuggingFaceDatasetParams( + repo_id="yelp_review_full", + split="train[:3000]", + ), + "trainer_parameters": HuggingFaceTrainerParams( + training_parameters=transformers.TrainingArguments( + output_dir="test_tune_api", + learning_rate=katib.search.double(min=1e-05, max=5e-05), + ), + ), + }, + TEST_RESULT_SUCCESS, + ), +] + + @pytest.fixture def katib_client(): with patch( @@ -284,6 +520,16 @@ def katib_client(): return_value=Mock( GetObservationLog=Mock(side_effect=get_observation_log_response) ), + ), patch( + "kubernetes.client.CoreV1Api", + return_value=Mock( + create_namespaced_persistent_volume_claim=Mock( + side_effect=create_namespaced_persistent_volume_claim_response + ), + list_namespaced_persistent_volume_claim=Mock( + side_effect=list_namespaced_persistent_volume_claim_response + ), + ), ): client = KatibClient() yield client @@ -320,3 +566,78 @@ def test_get_trial_metrics(katib_client, test_name, kwargs, expected_output): except Exception as e: assert type(e) is expected_output print("test execution complete") + + +@pytest.mark.parametrize("test_name,kwargs,expected_output", test_tune_data) +def test_tune(katib_client, test_name, kwargs, expected_output): + """ + test tune function of katib client + """ + print("\n\nExecuting test:", test_name) + + with patch.object( + katib_client, "create_experiment", return_value=Mock() + ) as mock_create_experiment: + try: + katib_client.tune(**kwargs) + mock_create_experiment.assert_called_once() + + if expected_output == TEST_RESULT_SUCCESS: + assert expected_output == TEST_RESULT_SUCCESS + call_args = mock_create_experiment.call_args + experiment = call_args[0][0] + + if test_name == "valid flow with custom objective tuning": + # Verify input_params + args_content = "".join( + experiment.spec.trial_template.trial_spec.spec.template.spec.containers[ + 0 + ].args + ) + assert "'a': '${trialParameters.a}'" in args_content + # Verify trial_params + assert experiment.spec.trial_template.trial_parameters == [ + V1beta1TrialParameterSpec(name="a", reference="a"), + ] + # Verify experiment_params + assert experiment.spec.parameters == [ + V1beta1ParameterSpec( + name="a", + parameter_type="int", + feasible_space=V1beta1FeasibleSpace(min="10", max="100"), + ), + ] + + elif test_name == "valid flow with external model tuning": + # Verify input_params + args_content = "".join( + experiment.spec.trial_template.trial_spec.spec.pytorch_replica_specs[ + "Master" + ] + .template.spec.containers[0] + .args + ) + assert ( + '"learning_rate": "${trialParameters.learning_rate}"' + in args_content + ) + # Verify trial_params + assert experiment.spec.trial_template.trial_parameters == [ + V1beta1TrialParameterSpec( + name="learning_rate", reference="learning_rate" + ), + ] + # Verify experiment_params + assert experiment.spec.parameters == [ + V1beta1ParameterSpec( + name="learning_rate", + parameter_type="double", + feasible_space=V1beta1FeasibleSpace( + min="1e-05", max="5e-05" + ), + ), + ] + + except Exception as e: + assert type(e) is expected_output + print("test execution complete") diff --git a/sdk/python/v1beta1/setup.py b/sdk/python/v1beta1/setup.py index 78ae02aa739..ae7e1365363 100644 --- a/sdk/python/v1beta1/setup.py +++ b/sdk/python/v1beta1/setup.py @@ -86,6 +86,6 @@ ], install_requires=REQUIRES, extras_require={ - "huggingface": ["kubeflow-training[huggingface]==1.8.0"], + "huggingface": ["kubeflow-training[huggingface]==1.8.1"], }, ) diff --git a/test/unit/v1beta1/requirements.txt b/test/unit/v1beta1/requirements.txt index 2aa91b337e3..74402202c17 100644 --- a/test/unit/v1beta1/requirements.txt +++ b/test/unit/v1beta1/requirements.txt @@ -1,2 +1,3 @@ grpcio-testing==1.41.1 pytest==7.2.0 +kubeflow-training[huggingface]==1.8.1