add azure openai to groundedness (#369)

* add azure openai to groundedness * undo accidental aggregation change * example of using azure openai * small change * fix
truera · Aug 10, 2023 · 9ddf355 · 9ddf355
1 parent ba74177
commit 9ddf355
Show file tree

Hide file tree

Showing 2 changed files with 303 additions and 5 deletions.
diff --git a/trulens_eval/examples/models/azure_openai_llama_index.ipynb b/trulens_eval/examples/models/azure_openai_llama_index.ipynb
@@ -0,0 +1,297 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Azure OpenAI and Llama-Index\n",
+    "\n",
+    "In this quickstart you will create a simple Llama Index App and learn how to log it and get feedback on an LLM response using both an embedding and chat completion model from Azure OpenAI.\n",
+    "\n",
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/main/trulens_eval/examples/models/azure_openai_llama_index.ipynb)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "### Install dependencies\n",
+    "Let's install some of the dependencies for this notebook if we don't have them already"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#! pip install trulens-eval==0.8.0 llama_index==0.7.0 langchain==0.0.248 html2text==2020.1.16"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Add API keys\n",
+    "For this quickstart, you will need a larger set of information from Azure OpenAI compared to typical OpenAI usage."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"...\"\n",
+    "os.environ[\"OPENAI_API_BASE\"] = \"...\"\n",
+    "os.environ[\"OPENAI_API_VERSION\"] = \"2023-05-15\"\n",
+    "os.environ[\"OPENAI_API_TYPE\"] = \"azure\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Import from TruLens"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports main tools:\n",
+    "from trulens_eval import TruLlama, Feedback, Tru, feedback\n",
+    "tru = Tru()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create Simple LLM Application\n",
+    "\n",
+    "This example uses LlamaIndex which internally uses an OpenAI LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "import openai\n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "from llama_index.llms import AzureOpenAI\n",
+    "from llama_index import LangchainEmbedding\n",
+    "from llama_index import VectorStoreIndex, SimpleWebPageReader, ServiceContext\n",
+    "from llama_index import set_global_service_context\n",
+    "import logging\n",
+    "import sys\n",
+    "\n",
+    "logging.basicConfig(\n",
+    "    stream=sys.stdout, level=logging.INFO\n",
+    ")  # logging.DEBUG for more verbose output\n",
+    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
+    "\n",
+    "# get model from Azure\n",
+    "llm = AzureOpenAI(engine=\"...\", model=\"text-davinci-003\") \n",
+    "\n",
+    "# You need to deploy your own embedding model as well as your own chat completion model\n",
+    "embedding_llm = LangchainEmbedding(\n",
+    "    OpenAIEmbeddings(\n",
+    "        model=\"text-embedding-ada-002\",\n",
+    "        deployment=\"...\",\n",
+    "        openai_api_key=os.environ[\"OPENAI_API_KEY\"],\n",
+    "        openai_api_base=os.environ[\"OPENAI_API_BASE\"],\n",
+    "        openai_api_type=os.environ[\"OPENAI_API_TYPE\"],\n",
+    "        openai_api_version=os.environ[\"OPENAI_API_VERSION\"]\n",
+    "    ),\n",
+    "    embed_batch_size=1\n",
+    ")\n",
+    "\n",
+    "documents = SimpleWebPageReader(html_to_text=True).load_data(\n",
+    "    [\"http://paulgraham.com/worked.html\"]\n",
+    ")\n",
+    "\n",
+    "service_context = ServiceContext.from_defaults(\n",
+    "    llm=llm,\n",
+    "    embed_model=embedding_llm\n",
+    ")\n",
+    "\n",
+    "set_global_service_context(service_context)\n",
+    "\n",
+    "index = VectorStoreIndex.from_documents(documents)\n",
+    "\n",
+    "query_engine = index.as_query_engine()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Send your first request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"What is most interesting about this essay?\"\n",
+    "answer = query_engine.query(query)\n",
+    "\n",
+    "print(answer.get_formatted_sources())\n",
+    "print(\"query was:\", query)\n",
+    "print(\"answer was:\", answer)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialize Feedback Function(s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "# Initialize AzureOpenAI-based feedback function collection class:\n",
+    "azopenai = feedback.AzureOpenAI(model_engine=\"gpt-35-turbo\", deployment_id=\"...\")\n",
+    "\n",
+    "# Question/answer relevance between overall question and answer.\n",
+    "f_qa_relevance = Feedback(azopenai.relevance).on_input_output()\n",
+    "\n",
+    "# Question/statement relevance between question and each context chunk.\n",
+    "f_qs_relevance = Feedback(azopenai.qs_relevance).on_input().on(\n",
+    "    TruLlama.select_source_nodes().node.text\n",
+    ").aggregate(np.mean)\n",
+    "\n",
+    "# groundedness of output on the context\n",
+    "groundedness = feedback.Groundedness(summarize_provider=azopenai, groundedness_provider=azopenai)\n",
+    "f_groundedness = Feedback(groundedness.groundedness_measure).on(TruLlama.select_source_nodes().node.text).on_output()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Instrument chain for logging with TruLens"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tru_query_engine = TruLlama(query_engine,\n",
+    "    app_id='LlamaIndex_App1',\n",
+    "    feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"What is most interesting about this essay?\"\n",
+    "answer = tru_query_engine.query(query)\n",
+    "\n",
+    "print(answer.get_formatted_sources())\n",
+    "print(\"query was:\", query)\n",
+    "print(\"answer was:\", answer)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Explore in a Dashboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tru.run_dashboard() # open a local streamlit app to explore\n",
+    "\n",
+    "# tru.stop_dashboard() # stop if needed"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Or view results directly in your notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.11.4 ('agents')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "7d153714b979d5e6d08dd8ec90712dd93bff2c9b6c1f0c118169738af3430cd4"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/trulens_eval/trulens_eval/feedback.py b/trulens_eval/trulens_eval/feedback.py
@@ -1827,20 +1827,21 @@ class Groundedness(SerialModel, WithClassInfo):
     summarize_provider: Provider
     groundedness_provider: Provider
 
-    def __init__(self, groundedness_provider: Provider = None):
+    def __init__(self, summarize_provider: Provider = None, groundedness_provider: Provider = None):
         """Instantiates the groundedness providers. Currently the groundedness functions work well with a summarizer.
         This class will use an OpenAI summarizer to find the relevant strings in a text. The groundedness_provider can 
         either be an llm with OpenAI or NLI with huggingface.
 
         Args:
             groundedness_provider (Provider, optional): groundedness provider options: OpenAI LLM or HuggingFace NLI. Defaults to OpenAI().
         """
+        if summarize_provider is None:
+            summarize_provider = OpenAI()
         if groundedness_provider is None:
             groundedness_provider = OpenAI()
-        summarize_provider = OpenAI()
-        if not isinstance(groundedness_provider, (OpenAI, Huggingface)):
+        if not isinstance(groundedness_provider, (OpenAI, AzureOpenAI, Huggingface)):
             raise Exception(
-                "Groundedness is only supported groundedness_provider as OpenAI or Huggingface Providers."
+                "Groundedness is only supported groundedness_provider as OpenAI, AzureOpenAI or Huggingface Providers."
             )
         super().__init__(
             summarize_provider=summarize_provider,
@@ -1868,7 +1869,7 @@ def groundedness_measure(self, source: str, statement: str) -> float:
             float: A measure between 0 and 1, where 1 means each sentence is grounded in the source.
         """
         groundedness_scores = {}
-        if isinstance(self.groundedness_provider, OpenAI):
+        if isinstance(self.groundedness_provider, (AzureOpenAI, OpenAI)):
             plausible_junk_char_min = 4  # very likely "sentences" under 4 characters are punctuation, spaces, etc
             if len(statement) > plausible_junk_char_min:
                 reason = self.summarize_provider._groundedness_doc_in_out(