From 37b5beac964ad7f4abf66e2be6616075b56e58ae Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 21 Mar 2024 18:08:01 -0400 Subject: [PATCH 01/10] fix: Adjusted default model params --- presets/inference/text-generation/inference_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presets/inference/text-generation/inference_api.py b/presets/inference/text-generation/inference_api.py index bf739844d..f6c604a54 100644 --- a/presets/inference/text-generation/inference_api.py +++ b/presets/inference/text-generation/inference_api.py @@ -125,11 +125,11 @@ def health_check(): class GenerateKwargs(BaseModel): max_length: int = 200 # Length of input prompt+max_new_tokens min_length: int = 0 - do_sample: bool = False + do_sample: bool = True early_stopping: bool = False num_beams: int = 1 temperature: float = 1.0 - top_k: int = 50 + top_k: int = 10 top_p: float = 1 typical_p: float = 1 repetition_penalty: float = 1 From 40da81b44c95858430614b5e5ad23432bd2ff72c Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 21 Mar 2024 18:11:45 -0400 Subject: [PATCH 02/10] fix: update supported models --- presets/models/supported_models.yaml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml index 0f68002c5..262be71b9 100644 --- a/presets/models/supported_models.yaml +++ b/presets/models/supported_models.yaml @@ -25,6 +25,7 @@ models: runtime: llama-2 tag: 0.0.3 # Tag history: + # 0.0.4 - Adjust default model params (#310) # 0.0.3 - Inference API Cleanup (#233) # 0.0.2 - Eliminate Unnecessary Process Group Creation in Worker Initialization (#244) # 0.0.1 - Initial Release @@ -34,23 +35,24 @@ models: type: text-generation version: https://huggingface.co/tiiuae/falcon-7b/commit/898df1396f35e447d5fe44e0a3ccaaaa69f30d36 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 - name: falcon-7b-instruct type: text-generation version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 - name: falcon-40b type: text-generation version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 - name: falcon-40b-instruct type: text-generation version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f runtime: tfs - tag: 0.0.3 + tag: 0.0.4 # Tag history: + # 0.0.4 - Adjust default model params (#310) # 0.0.3 - Update Default Params (#294) # 0.0.2 - Inference API Cleanup (#233) # 0.0.1 - Initial Release @@ -60,13 +62,14 @@ models: type: text-generation version: https://huggingface.co/mistralai/Mistral-7B-v0.1/commit/26bca36bde8333b5d7f72e9ed20ccda6a618af24 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 - name: mistral-7b-instruct type: text-generation version: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/commit/b70aa86578567ba3301b21c8a27bea4e8f6d6d61 runtime: tfs - tag: 0.0.3 + tag: 0.0.4 # Tag history: + # 0.0.4 - Adjust default model params (#310) # 0.0.3 - Update Default Params (#294) # 0.0.2 - Inference API Cleanup (#233) # 0.0.1 - Initial Release @@ -76,7 +79,8 @@ models: type: text-generation version: https://huggingface.co/microsoft/phi-2/commit/b10c3eba545ad279e7208ee3a5d644566f001670 runtime: tfs - tag: 0.0.2 + tag: 0.0.3 # Tag history: + # 0.0.3 - Adjust default model params (#310) # 0.0.2 - Update Default Params (#294) # 0.0.1 - Initial Release From d4230134ba5c4ac0e37731006ed3711ec4dacbd8 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 21 Mar 2024 18:12:07 -0400 Subject: [PATCH 03/10] nit --- presets/models/supported_models.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml index 262be71b9..1bd396133 100644 --- a/presets/models/supported_models.yaml +++ b/presets/models/supported_models.yaml @@ -25,7 +25,6 @@ models: runtime: llama-2 tag: 0.0.3 # Tag history: - # 0.0.4 - Adjust default model params (#310) # 0.0.3 - Inference API Cleanup (#233) # 0.0.2 - Eliminate Unnecessary Process Group Creation in Worker Initialization (#244) # 0.0.1 - Initial Release From cab254ed7df9803ff3be257c3c7ea7f3a3398747 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 21 Mar 2024 18:20:34 -0400 Subject: [PATCH 04/10] nit --- .github/workflows/kind-cluster/determine_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/kind-cluster/determine_models.py b/.github/workflows/kind-cluster/determine_models.py index 402365441..5ace3ba63 100644 --- a/.github/workflows/kind-cluster/determine_models.py +++ b/.github/workflows/kind-cluster/determine_models.py @@ -117,10 +117,10 @@ def check_modified_models(pr_branch): def main(): pr_branch = os.environ.get("PR_BRANCH", "main") # If not specified default to 'main' - force_run_all = os.environ.get("FORCE_RUN_ALL", False) # If not specified default to False + force_run_all = os.environ.get("FORCE_RUN_ALL", "false") # If not specified default to False affected_models = [] - if force_run_all: + if force_run_all != "false": affected_models = [model['name'] for model in YAML_PR['models']] else: # Logic to determine affected models From 26a1d6befb9eec4f9f887e84fc55a7cedc693ff4 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 21 Mar 2024 18:34:06 -0400 Subject: [PATCH 05/10] fix: update UT param --- presets/inference/text-generation/tests/test_inference_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presets/inference/text-generation/tests/test_inference_api.py b/presets/inference/text-generation/tests/test_inference_api.py index d6506b08b..c15b0f38f 100644 --- a/presets/inference/text-generation/tests/test_inference_api.py +++ b/presets/inference/text-generation/tests/test_inference_api.py @@ -156,9 +156,9 @@ def test_default_generation_params(configured_app): _, kwargs = mock_pipeline.call_args assert kwargs['max_length'] == 200 assert kwargs['min_length'] == 0 - assert kwargs['do_sample'] is False + assert kwargs['do_sample'] is True assert kwargs['temperature'] == 1.0 - assert kwargs['top_k'] == 50 + assert kwargs['top_k'] == 10 assert kwargs['top_p'] == 1 assert kwargs['typical_p'] == 1 assert kwargs['repetition_penalty'] == 1 From 1957fa7499c9be1bcd36f8260c88164f483e52c9 Mon Sep 17 00:00:00 2001 From: Ishaan Sehgal Date: Fri, 22 Mar 2024 19:51:04 -0400 Subject: [PATCH 06/10] Update supported_models.yaml --- presets/models/supported_models.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml index 1bd396133..14079be47 100644 --- a/presets/models/supported_models.yaml +++ b/presets/models/supported_models.yaml @@ -44,12 +44,12 @@ models: type: text-generation version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146 runtime: tfs - tag: 0.0.4 + tag: 0.0.5 - name: falcon-40b-instruct type: text-generation version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f runtime: tfs - tag: 0.0.4 + tag: 0.0.5 # Tag history: # 0.0.4 - Adjust default model params (#310) # 0.0.3 - Update Default Params (#294) From 09cd248f389f0639490233101c47ff2ad4b1eb67 Mon Sep 17 00:00:00 2001 From: Ishaan Sehgal Date: Sat, 23 Mar 2024 19:17:58 -0400 Subject: [PATCH 07/10] Update e2e-preset-test.yml Signed-off-by: Ishaan Sehgal --- .github/workflows/e2e-preset-test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml index 06dd5ac3b..facc7f134 100644 --- a/.github/workflows/e2e-preset-test.yml +++ b/.github/workflows/e2e-preset-test.yml @@ -264,16 +264,19 @@ jobs: if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') run: | kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} --timeout=1800s + sleep 30 - name: Test home endpoint if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') run: | curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/ + sleep 30 - name: Test healthz endpoint if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') run: | curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz + sleep 30 - name: Test inference endpoint if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') From c2c200d579015433af1701c0ee31cf7fa8ee214b Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 25 Mar 2024 10:28:36 -0400 Subject: [PATCH 08/10] revert sleep --- .github/workflows/e2e-preset-test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml index facc7f134..06dd5ac3b 100644 --- a/.github/workflows/e2e-preset-test.yml +++ b/.github/workflows/e2e-preset-test.yml @@ -264,19 +264,16 @@ jobs: if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') run: | kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} --timeout=1800s - sleep 30 - name: Test home endpoint if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') run: | curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/ - sleep 30 - name: Test healthz endpoint if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') run: | curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz - sleep 30 - name: Test inference endpoint if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true') From 30591d1236dbe62e163237e85cc33eb481e6df0a Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Mon, 25 Mar 2024 10:39:36 -0400 Subject: [PATCH 09/10] note: 0.0.5 --- presets/models/supported_models.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml index 14079be47..27e901ba4 100644 --- a/presets/models/supported_models.yaml +++ b/presets/models/supported_models.yaml @@ -40,6 +40,7 @@ models: version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99 runtime: tfs tag: 0.0.4 + # Note: 0.0.5 - Corrected image version after incomplete upload issue with 0.0.4 - name: falcon-40b type: text-generation version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146 From 3a4325365291afb38a8d646c4e7607124bb29065 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 26 Mar 2024 09:51:00 -0700 Subject: [PATCH 10/10] note: 0.0.5 --- presets/models/supported_models.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml index 27e901ba4..0441a945a 100644 --- a/presets/models/supported_models.yaml +++ b/presets/models/supported_models.yaml @@ -40,7 +40,11 @@ models: version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99 runtime: tfs tag: 0.0.4 - # Note: 0.0.5 - Corrected image version after incomplete upload issue with 0.0.4 + # Tag history: + # 0.0.4 - Adjust default model params (#310) + # 0.0.3 - Update Default Params (#294) + # 0.0.2 - Inference API Cleanup (#233) + # 0.0.1 - Initial Release - name: falcon-40b type: text-generation version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146 @@ -51,8 +55,9 @@ models: version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f runtime: tfs tag: 0.0.5 - # Tag history: - # 0.0.4 - Adjust default model params (#310) + # Tag history for 40b models: + # 0.0.5 - Adjust default model params (#310) + # 0.0.4 - Skipped due to incomplete upload issue # 0.0.3 - Update Default Params (#294) # 0.0.2 - Inference API Cleanup (#233) # 0.0.1 - Initial Release