From 37b5beac964ad7f4abf66e2be6616075b56e58ae Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 21 Mar 2024 18:08:01 -0400
Subject: [PATCH 01/10] fix: Adjusted default model params

---
 presets/inference/text-generation/inference_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/presets/inference/text-generation/inference_api.py b/presets/inference/text-generation/inference_api.py
index bf739844d..f6c604a54 100644
--- a/presets/inference/text-generation/inference_api.py
+++ b/presets/inference/text-generation/inference_api.py
@@ -125,11 +125,11 @@ def health_check():
 class GenerateKwargs(BaseModel):
     max_length: int = 200 # Length of input prompt+max_new_tokens
     min_length: int = 0
-    do_sample: bool = False
+    do_sample: bool = True
     early_stopping: bool = False
     num_beams: int = 1
     temperature: float = 1.0
-    top_k: int = 50
+    top_k: int = 10
     top_p: float = 1
     typical_p: float = 1
     repetition_penalty: float = 1

From 40da81b44c95858430614b5e5ad23432bd2ff72c Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 21 Mar 2024 18:11:45 -0400
Subject: [PATCH 02/10] fix: update supported models

---
 presets/models/supported_models.yaml | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml
index 0f68002c5..262be71b9 100644
--- a/presets/models/supported_models.yaml
+++ b/presets/models/supported_models.yaml
@@ -25,6 +25,7 @@ models:
     runtime: llama-2
     tag: 0.0.3
     # Tag history:
+    # 0.0.4 - Adjust default model params (#310)
     # 0.0.3 - Inference API Cleanup (#233)
     # 0.0.2 - Eliminate Unnecessary Process Group Creation in Worker Initialization (#244)
     # 0.0.1 - Initial Release
@@ -34,23 +35,24 @@ models:
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-7b/commit/898df1396f35e447d5fe44e0a3ccaaaa69f30d36
     runtime: tfs
-    tag: 0.0.3
+    tag: 0.0.4
   - name: falcon-7b-instruct
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99
     runtime: tfs
-    tag: 0.0.3
+    tag: 0.0.4
   - name: falcon-40b
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146
     runtime: tfs
-    tag: 0.0.3
+    tag: 0.0.4
   - name: falcon-40b-instruct
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f
     runtime: tfs
-    tag: 0.0.3
+    tag: 0.0.4
     # Tag history:
+    # 0.0.4 - Adjust default model params (#310)
     # 0.0.3 - Update Default Params (#294)
     # 0.0.2 - Inference API Cleanup (#233)
     # 0.0.1 - Initial Release
@@ -60,13 +62,14 @@ models:
     type: text-generation 
     version: https://huggingface.co/mistralai/Mistral-7B-v0.1/commit/26bca36bde8333b5d7f72e9ed20ccda6a618af24
     runtime: tfs
-    tag: 0.0.3
+    tag: 0.0.4
   - name: mistral-7b-instruct
     type: text-generation
     version: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/commit/b70aa86578567ba3301b21c8a27bea4e8f6d6d61
     runtime: tfs
-    tag: 0.0.3
+    tag: 0.0.4
     # Tag history:
+    # 0.0.4 - Adjust default model params (#310)
     # 0.0.3 - Update Default Params (#294)
     # 0.0.2 - Inference API Cleanup (#233)
     # 0.0.1 - Initial Release
@@ -76,7 +79,8 @@ models:
     type: text-generation 
     version: https://huggingface.co/microsoft/phi-2/commit/b10c3eba545ad279e7208ee3a5d644566f001670
     runtime: tfs
-    tag: 0.0.2
+    tag: 0.0.3
     # Tag history:
+    # 0.0.3 - Adjust default model params (#310)
     # 0.0.2 - Update Default Params (#294)
     # 0.0.1 - Initial Release

From d4230134ba5c4ac0e37731006ed3711ec4dacbd8 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 21 Mar 2024 18:12:07 -0400
Subject: [PATCH 03/10] nit

---
 presets/models/supported_models.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml
index 262be71b9..1bd396133 100644
--- a/presets/models/supported_models.yaml
+++ b/presets/models/supported_models.yaml
@@ -25,7 +25,6 @@ models:
     runtime: llama-2
     tag: 0.0.3
     # Tag history:
-    # 0.0.4 - Adjust default model params (#310)
     # 0.0.3 - Inference API Cleanup (#233)
     # 0.0.2 - Eliminate Unnecessary Process Group Creation in Worker Initialization (#244)
     # 0.0.1 - Initial Release

From cab254ed7df9803ff3be257c3c7ea7f3a3398747 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 21 Mar 2024 18:20:34 -0400
Subject: [PATCH 04/10] nit

---
 .github/workflows/kind-cluster/determine_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/kind-cluster/determine_models.py b/.github/workflows/kind-cluster/determine_models.py
index 402365441..5ace3ba63 100644
--- a/.github/workflows/kind-cluster/determine_models.py
+++ b/.github/workflows/kind-cluster/determine_models.py
@@ -117,10 +117,10 @@ def check_modified_models(pr_branch):
 
 def main():
     pr_branch = os.environ.get("PR_BRANCH", "main") # If not specified default to 'main'
-    force_run_all = os.environ.get("FORCE_RUN_ALL", False) # If not specified default to False
+    force_run_all = os.environ.get("FORCE_RUN_ALL", "false") # If not specified default to False
 
     affected_models = []
-    if force_run_all:
+    if force_run_all != "false":
         affected_models = [model['name'] for model in YAML_PR['models']]
     else:
         # Logic to determine affected models

From 26a1d6befb9eec4f9f887e84fc55a7cedc693ff4 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Thu, 21 Mar 2024 18:34:06 -0400
Subject: [PATCH 05/10] fix: update UT param

---
 presets/inference/text-generation/tests/test_inference_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/presets/inference/text-generation/tests/test_inference_api.py b/presets/inference/text-generation/tests/test_inference_api.py
index d6506b08b..c15b0f38f 100644
--- a/presets/inference/text-generation/tests/test_inference_api.py
+++ b/presets/inference/text-generation/tests/test_inference_api.py
@@ -156,9 +156,9 @@ def test_default_generation_params(configured_app):
         _, kwargs = mock_pipeline.call_args
         assert kwargs['max_length'] == 200
         assert kwargs['min_length'] == 0
-        assert kwargs['do_sample'] is False
+        assert kwargs['do_sample'] is True
         assert kwargs['temperature'] == 1.0
-        assert kwargs['top_k'] == 50
+        assert kwargs['top_k'] == 10
         assert kwargs['top_p'] == 1
         assert kwargs['typical_p'] == 1
         assert kwargs['repetition_penalty'] == 1

From 1957fa7499c9be1bcd36f8260c88164f483e52c9 Mon Sep 17 00:00:00 2001
From: Ishaan Sehgal <ishaanforthewin@gmail.com>
Date: Fri, 22 Mar 2024 19:51:04 -0400
Subject: [PATCH 06/10] Update supported_models.yaml

---
 presets/models/supported_models.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml
index 1bd396133..14079be47 100644
--- a/presets/models/supported_models.yaml
+++ b/presets/models/supported_models.yaml
@@ -44,12 +44,12 @@ models:
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146
     runtime: tfs
-    tag: 0.0.4
+    tag: 0.0.5
   - name: falcon-40b-instruct
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f
     runtime: tfs
-    tag: 0.0.4
+    tag: 0.0.5
     # Tag history:
     # 0.0.4 - Adjust default model params (#310)
     # 0.0.3 - Update Default Params (#294)

From 09cd248f389f0639490233101c47ff2ad4b1eb67 Mon Sep 17 00:00:00 2001
From: Ishaan Sehgal <ishaanforthewin@gmail.com>
Date: Sat, 23 Mar 2024 19:17:58 -0400
Subject: [PATCH 07/10] Update e2e-preset-test.yml

Signed-off-by: Ishaan Sehgal <ishaanforthewin@gmail.com>
---
 .github/workflows/e2e-preset-test.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
index 06dd5ac3b..facc7f134 100644
--- a/.github/workflows/e2e-preset-test.yml
+++ b/.github/workflows/e2e-preset-test.yml
@@ -264,16 +264,19 @@ jobs:
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} --timeout=1800s
+            sleep 30
         
       - name: Test home endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/
+            sleep 30
 
       - name: Test healthz endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz
+            sleep 30
     
       - name: Test inference endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')

From c2c200d579015433af1701c0ee31cf7fa8ee214b Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 25 Mar 2024 10:28:36 -0400
Subject: [PATCH 08/10] revert sleep

---
 .github/workflows/e2e-preset-test.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/e2e-preset-test.yml b/.github/workflows/e2e-preset-test.yml
index facc7f134..06dd5ac3b 100644
--- a/.github/workflows/e2e-preset-test.yml
+++ b/.github/workflows/e2e-preset-test.yml
@@ -264,19 +264,16 @@ jobs:
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{ matrix.model.name }} --timeout=1800s
-            sleep 30
         
       - name: Test home endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/
-            sleep 30
 
       - name: Test healthz endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')
         run: |
             curl http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/healthz
-            sleep 30
     
       - name: Test inference endpoint
         if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && (steps.check_prod_image.outputs.IMAGE_EXISTS == 'false' || env.FORCE_RUN_ALL == 'true')

From 30591d1236dbe62e163237e85cc33eb481e6df0a Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Mon, 25 Mar 2024 10:39:36 -0400
Subject: [PATCH 09/10] note: 0.0.5

---
 presets/models/supported_models.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml
index 14079be47..27e901ba4 100644
--- a/presets/models/supported_models.yaml
+++ b/presets/models/supported_models.yaml
@@ -40,6 +40,7 @@ models:
     version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99
     runtime: tfs
     tag: 0.0.4
+  # Note: 0.0.5 - Corrected image version after incomplete upload issue with 0.0.4
   - name: falcon-40b
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146

From 3a4325365291afb38a8d646c4e7607124bb29065 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 26 Mar 2024 09:51:00 -0700
Subject: [PATCH 10/10] note: 0.0.5

---
 presets/models/supported_models.yaml | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/presets/models/supported_models.yaml b/presets/models/supported_models.yaml
index 27e901ba4..0441a945a 100644
--- a/presets/models/supported_models.yaml
+++ b/presets/models/supported_models.yaml
@@ -40,7 +40,11 @@ models:
     version: https://huggingface.co/tiiuae/falcon-7b-instruct/commit/cf4b3c42ce2fdfe24f753f0f0d179202fea59c99
     runtime: tfs
     tag: 0.0.4
-  # Note: 0.0.5 - Corrected image version after incomplete upload issue with 0.0.4
+    # Tag history:
+    # 0.0.4 - Adjust default model params (#310)
+    # 0.0.3 - Update Default Params (#294)
+    # 0.0.2 - Inference API Cleanup (#233)
+    # 0.0.1 - Initial Release
   - name: falcon-40b
     type: text-generation
     version: https://huggingface.co/tiiuae/falcon-40b/commit/4a70170c215b36a3cce4b4253f6d0612bb7d4146
@@ -51,8 +55,9 @@ models:
     version: https://huggingface.co/tiiuae/falcon-40b-instruct/commit/ecb78d97ac356d098e79f0db222c9ce7c5d9ee5f
     runtime: tfs
     tag: 0.0.5
-    # Tag history:
-    # 0.0.4 - Adjust default model params (#310)
+    # Tag history for 40b models:
+    # 0.0.5 - Adjust default model params (#310)
+    # 0.0.4 - Skipped due to incomplete upload issue
     # 0.0.3 - Update Default Params (#294)
     # 0.0.2 - Inference API Cleanup (#233)
     # 0.0.1 - Initial Release