Merge branch 'main' into feature/PRD-700-replacing-zenml-login-refere…

…nces
zenml-io · Dec 12, 2024 · 0941de4 · 0941de4
2 parents f393f96 + 300dad0
commit 0941de4
Show file tree

Hide file tree

Showing 24 changed files with 661 additions and 63 deletions.
diff --git a/.github/workflows/production_run_complete_llm.yml b/.github/workflows/production_run_complete_llm.yml
@@ -47,7 +47,6 @@ jobs:
         working-directory: ./llm-complete-guide
         run: |
           zenml init
-          zenml connect --url $ZENML_STORE_URL --api-key $ZENML_STORE_API_KEY
 
       - name: Set stack (Production)
         working-directory: ./llm-complete-guide

diff --git a/.github/workflows/staging_run_complete_llm.yml b/.github/workflows/staging_run_complete_llm.yml
@@ -43,7 +43,6 @@ jobs:
         working-directory: ./llm-complete-guide
         run: |
           zenml init
-          zenml connect --url $ZENML_STORE_URL --api-key $ZENML_STORE_API_KEY
 
       - name: Set stack (Staging)
         working-directory: ./llm-complete-guide

diff --git a/llm-complete-guide/ZENML_VERSION.txt b/llm-complete-guide/ZENML_VERSION.txt
@@ -1 +1 @@
-v0.68.1
+0.71.0
diff --git a/llm-complete-guide/configs/dev/rag.yaml b/llm-complete-guide/configs/dev/rag.yaml
@@ -17,6 +17,7 @@ settings:
       - pygithub
       - rerankers[flashrank]
       - matplotlib
+      - elasticsearch
 
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete

diff --git a/llm-complete-guide/configs/dev/rag_eval.yaml b/llm-complete-guide/configs/dev/rag_eval.yaml
@@ -13,4 +13,5 @@ settings:
       - psycopg2-binary
       - tiktoken
       - pygithub
+      - elasticsearch
     python_package_installer: "uv"
diff --git a/llm-complete-guide/configs/production/eval.yaml b/llm-complete-guide/configs/production/eval.yaml
@@ -17,6 +17,7 @@ settings:
       - matplotlib
       - pillow
       - pygithub
+      - elasticsearch
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
       ZENML_ENABLE_RICH_TRACEBACK: FALSE

diff --git a/llm-complete-guide/configs/production/rag.yaml b/llm-complete-guide/configs/production/rag.yaml
@@ -17,6 +17,8 @@ settings:
       - pygithub
       - rerankers[flashrank]
       - matplotlib
+      - elasticsearch
+
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
       ZENML_ENABLE_RICH_TRACEBACK: FALSE

diff --git a/llm-complete-guide/configs/staging/eval.yaml b/llm-complete-guide/configs/staging/eval.yaml
@@ -17,6 +17,7 @@ settings:
       - matplotlib
       - pillow
       - pygithub
+      - elasticsearch
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
       ZENML_ENABLE_RICH_TRACEBACK: FALSE

diff --git a/llm-complete-guide/configs/staging/rag.yaml b/llm-complete-guide/configs/staging/rag.yaml
@@ -17,6 +17,7 @@ settings:
       - pygithub
       - rerankers[flashrank]
       - matplotlib
+      - elasticsearch
 
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete

diff --git a/llm-complete-guide/constants.py b/llm-complete-guide/constants.py
@@ -23,6 +23,9 @@
     384  # Update this to match the dimensionality of the new model
 )
 
+# ZenML constants
+ZENML_CHATBOT_MODEL = "zenml-docs-qa-chatbot"
+
 # Scraping constants
 RATE_LIMIT = 5  # Maximum number of requests per second
 
@@ -78,3 +81,4 @@
 USE_ARGILLA_ANNOTATIONS = False
 
 SECRET_NAME = os.getenv("ZENML_PROJECT_SECRET_NAME", "llm-complete")
+SECRET_NAME_ELASTICSEARCH = "elasticsearch-zenml"
diff --git a/llm-complete-guide/requirements-argilla.txt b/llm-complete-guide/requirements-argilla.txt
@@ -1,4 +1,4 @@
-zenml[server]>=0.68.1
+zenml[server]
 sentence-transformers>=3,<=3.0.1
 transformers<=4.44.0
 litellm

diff --git a/llm-complete-guide/requirements.txt b/llm-complete-guide/requirements.txt
@@ -1,4 +1,4 @@
-zenml[server]>=0.68.1
+zenml[server]
 ratelimit
 pgvector
 psycopg2-binary
@@ -20,6 +20,7 @@ datasets
 torch
 gradio
 huggingface-hub
+elasticsearch
 
 # optional requirements for S3 artifact store
 # s3fs>2022.3.0

diff --git a/llm-complete-guide/steps/eval_retrieval.py b/llm-complete-guide/steps/eval_retrieval.py
@@ -19,8 +19,10 @@
 
 from datasets import load_dataset
 from utils.llm_utils import (
+    find_vectorstore_name,
     get_db_conn,
     get_embeddings,
+    get_es_client,
     get_topn_similar_docs,
     rerank_documents,
 )
@@ -76,11 +78,23 @@ def query_similar_docs(
         Tuple containing the question, URL ending, and retrieved URLs.
     """
     embedded_question = get_embeddings(question)
-    db_conn = get_db_conn()
+    conn = None
+    es_client = None
+
+    vector_store_name = find_vectorstore_name()
+    if vector_store_name == "pgvector":
+        conn = get_db_conn()
+    else:
+        es_client = get_es_client()
+
     num_docs = 20 if use_reranking else returned_sample_size
     # get (content, url) tuples for the top n similar documents
     top_similar_docs = get_topn_similar_docs(
-        embedded_question, db_conn, n=num_docs, include_metadata=True
+        embedded_question, 
+        conn=conn, 
+        es_client=es_client, 
+        n=num_docs, 
+        include_metadata=True
     )
 
     if use_reranking: