From 78bb89ccfb8d9f0b61e13e5d84c94295b51e8175 Mon Sep 17 00:00:00 2001 From: rick Date: Thu, 28 Sep 2023 21:58:19 +0000 Subject: [PATCH] fix colabs --- .../workflows/combine_nb_to_docs_testing.sh | 2 + README.md | 8 +- docs/trulens_eval/gh_top_intro.md | 8 +- docs/trulens_eval/intro.md | 6 +- trulens_eval/README.md | 6 +- .../colab/langchain_quickstart_colab.ipynb | 311 ++++++++++++++++++ .../colab/llama_index_quickstart_colab.ipynb | 306 +++++++++++++++++ .../colab/text2text_quickstart_colab.ipynb | 272 +++++++++++++++ 8 files changed, 905 insertions(+), 14 deletions(-) create mode 100644 trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb create mode 100644 trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb create mode 100644 trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb diff --git a/.github/workflows/combine_nb_to_docs_testing.sh b/.github/workflows/combine_nb_to_docs_testing.sh index 5b8873932..da92ee99f 100755 --- a/.github/workflows/combine_nb_to_docs_testing.sh +++ b/.github/workflows/combine_nb_to_docs_testing.sh @@ -54,6 +54,8 @@ mv TOP_README.md ../../README.md # Links are referenced in intro.md and gh_intro.md # There are symlinks from ../../trulens_eval/generated_files/ to these scripts for testing +mkdir -p ../../trulens_eval/examples/quickstart/colab/ +mv *quickstart_colab.ipynb ../../trulens_eval/examples/quickstart/colab/ mkdir -p ../../trulens_eval/examples/quickstart/py_script_quickstarts/ mv ./py_script_quickstarts/*.py ../../trulens_eval/examples/quickstart/py_script_quickstarts/ diff --git a/README.md b/README.md index 8396907b6..e5fdb7774 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ![PyPI - Downloads](https://img.shields.io/pypi/dm/trulens_eval) [![Slack](https://img.shields.io/badge/slack-join-green?logo=slack)](https://communityinviter.com/apps/aiqualityforum/josh) [![Docs](https://img.shields.io/badge/docs-trulens.org-blue)](https://www.trulens.org/welcome/) -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.10.0/trulens_eval/examples/colab/quickstarts/langchain_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.10.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) # 🦑 **Welcome to TruLens!** @@ -44,21 +44,21 @@ TruLens supports the evaluation of tracking for any LLM app framework. Choose a **Langchain** [langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/langchain_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) [langchain_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py). **Llama-Index** [llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/llama_index_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) [llama_index_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py) **No Framework** [text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/text2text_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) [text2text_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py) diff --git a/docs/trulens_eval/gh_top_intro.md b/docs/trulens_eval/gh_top_intro.md index eded2f3c1..2d949d429 100644 --- a/docs/trulens_eval/gh_top_intro.md +++ b/docs/trulens_eval/gh_top_intro.md @@ -4,7 +4,7 @@ ![PyPI - Downloads](https://img.shields.io/pypi/dm/trulens_eval) [![Slack](https://img.shields.io/badge/slack-join-green?logo=slack)](https://communityinviter.com/apps/aiqualityforum/josh) [![Docs](https://img.shields.io/badge/docs-trulens.org-blue)](https://www.trulens.org/welcome/) -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.10.0/trulens_eval/examples/colab/quickstarts/langchain_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.10.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) # 🦑 **Welcome to TruLens!** @@ -44,20 +44,20 @@ TruLens supports the evaluation of tracking for any LLM app framework. Choose a **Langchain** [langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/langchain_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) [langchain_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py). **Llama-Index** [llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/llama_index_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) [llama_index_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py) **No Framework** [text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/text2text_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) [text2text_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py) diff --git a/docs/trulens_eval/intro.md b/docs/trulens_eval/intro.md index 1779ee2f0..d85fe8822 100644 --- a/docs/trulens_eval/intro.md +++ b/docs/trulens_eval/intro.md @@ -47,21 +47,21 @@ TruLens supports the evaluation of tracking for any LLM app framework. Choose a **Langchain** [langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/langchain_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) [langchain_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py). **Llama-Index** [llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/llama_index_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) [llama_index_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py) **No Framework** [text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/text2text_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) [text2text_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py) diff --git a/trulens_eval/README.md b/trulens_eval/README.md index 1779ee2f0..d85fe8822 100644 --- a/trulens_eval/README.md +++ b/trulens_eval/README.md @@ -47,21 +47,21 @@ TruLens supports the evaluation of tracking for any LLM app framework. Choose a **Langchain** [langchain_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/langchain_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/langchain_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb) [langchain_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/langchain_quickstart.py). **Llama-Index** [llama_index_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/llama_index_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb) [llama_index_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/llama_index_quickstart.py) **No Framework** [text2text_quickstart.ipynb](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/text2text_quickstart.ipynb). -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/colab/quickstarts/text2text_quickstart_colab.ipynb) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb) [text2text_quickstart.py](https://github.com/truera/trulens/blob/releases/rc-trulens-eval-0.14.0/trulens_eval/examples/quickstart/py_script_quickstarts/text2text_quickstart.py) diff --git a/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb b/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb new file mode 100644 index 000000000..504a33d13 --- /dev/null +++ b/trulens_eval/examples/quickstart/colab/langchain_quickstart_colab.ipynb @@ -0,0 +1,311 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U trulens-eval\n", + "\n", + "# Google Colab Dependencies\n", + "!npm install localtunnel -q\n", + "!pip install -q streamlit>=1.26.0" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Langchain Quickstart\n", + "\n", + "In this quickstart you will create a simple LLM Chain and learn how to log it and get feedback on an LLM response.\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/main/trulens_eval/examples/quickstart/langchain_quickstart.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ! pip install trulens_eval==0.14.0 langchain>=0.0.263" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "### Add API keys\n", + "For this quickstart you will need Open AI and Huggingface keys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"OPENAI_API_KEY\"] = \"...\"\n", + "os.environ[\"HUGGINGFACE_API_KEY\"] = \"...\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import from LangChain and TruLens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import JSON\n", + "\n", + "# Imports main tools:\n", + "from trulens_eval import TruChain, Feedback, Huggingface, Tru\n", + "tru = Tru()\n", + "\n", + "# Imports from langchain to build app. You may need to install langchain first\n", + "# with the following:\n", + "# ! pip install langchain>=0.0.170\n", + "from langchain.chains import LLMChain\n", + "from langchain.llms import OpenAI\n", + "from langchain.prompts.chat import ChatPromptTemplate, PromptTemplate\n", + "from langchain.prompts.chat import HumanMessagePromptTemplate" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Simple LLM Application\n", + "\n", + "This example uses a LangChain framework and OpenAI LLM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "full_prompt = HumanMessagePromptTemplate(\n", + " prompt=PromptTemplate(\n", + " template=\n", + " \"Provide a helpful response with relevant background information for the following: {prompt}\",\n", + " input_variables=[\"prompt\"],\n", + " )\n", + ")\n", + "\n", + "chat_prompt_template = ChatPromptTemplate.from_messages([full_prompt])\n", + "\n", + "llm = OpenAI(temperature=0.9, max_tokens=128)\n", + "\n", + "chain = LLMChain(llm=llm, prompt=chat_prompt_template, verbose=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Send your first request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "prompt_input = '¿que hora es?'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_response = chain(prompt_input)\n", + "\n", + "display(llm_response)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Feedback Function(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize Huggingface-based feedback function collection class:\n", + "hugs = Huggingface()\n", + "\n", + "# Define a language match feedback function using HuggingFace.\n", + "f_lang_match = Feedback(hugs.language_match).on_input_output()\n", + "# By default this will check language match on the main app input and main app\n", + "# output." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instrument chain for logging with TruLens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru_recorder = TruChain(chain,\n", + " app_id='Chain1_ChatApplication',\n", + " feedbacks=[f_lang_match])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with tru_recorder as recording:\n", + " llm_response = chain(prompt_input)\n", + "\n", + "display(llm_response)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Explore in a Dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru.run_dashboard() # open a local streamlit app to explore\n", + "\n", + "# tru.stop_dashboard() # stop if needed" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Chain Leaderboard\n", + "\n", + "Understand how your LLM application is performing at a glance. Once you've set up logging and evaluation in your application, you can view key performance statistics including cost and average feedback value across all of your LLM apps using the chain leaderboard. As you iterate new versions of your LLM application, you can compare their performance across all of the different quality metrics you've set up.\n", + "\n", + "Note: Average feedback values are returned and displayed in a range from 0 (worst) to 1 (best).\n", + "\n", + "![Chain Leaderboard](https://www.trulens.org/Assets/image/Leaderboard.png)\n", + "\n", + "To dive deeper on a particular chain, click \"Select Chain\".\n", + "\n", + "### Understand chain performance with Evaluations\n", + " \n", + "To learn more about the performance of a particular chain or LLM model, we can select it to view its evaluations at the record level. LLM quality is assessed through the use of feedback functions. Feedback functions are extensible methods for determining the quality of LLM responses and can be applied to any downstream LLM task. Out of the box we provide a number of feedback functions for assessing model agreement, sentiment, relevance and more.\n", + "\n", + "The evaluations tab provides record-level metadata and feedback on the quality of your LLM application.\n", + "\n", + "![Evaluations](https://www.trulens.org/Assets/image/Leaderboard.png)\n", + "\n", + "### Deep dive into full chain metadata\n", + "\n", + "Click on a record to dive deep into all of the details of your chain stack and underlying LLM, captured by tru_chain_recorder.\n", + "\n", + "![Explore a Chain](https://www.trulens.org/Assets/image/Chain_Explore.png)\n", + "\n", + "If you prefer the raw format, you can quickly get it using the \"Display full chain json\" or \"Display full record json\" buttons at the bottom of the page." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Or view results directly in your notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "vscode": { + "interpreter": { + "hash": "d5737f6101ac92451320b0e41890107145710b89f85909f3780d702e7818f973" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb b/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb new file mode 100644 index 000000000..423fa37f8 --- /dev/null +++ b/trulens_eval/examples/quickstart/colab/llama_index_quickstart_colab.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U trulens-eval\n", + "\n", + "# Google Colab Dependencies\n", + "!npm install localtunnel -q\n", + "!pip install -q streamlit>=1.26.0" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llama-Index Quickstart\n", + "\n", + "In this quickstart you will create a simple Llama Index App and learn how to log it and get feedback on an LLM response.\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/main/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "### Install dependencies\n", + "Let's install some of the dependencies for this notebook if we don't have them already" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#! pip install trulens-eval==0.14.0 llama_index>=0.8.29post1 html2text>=2020.1.16" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add API keys\n", + "For this quickstart, you will need Open AI and Huggingface keys. The OpenAI key is used for embeddings and GPT, and the Huggingface key is used for evaluation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"OPENAI_API_KEY\"] = \"...\"\n", + "os.environ[\"HUGGINGFACE_API_KEY\"] = \"...\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import from LlamaIndex and TruLens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports main tools:\n", + "from trulens_eval import TruLlama, Feedback, Tru, feedback\n", + "tru = Tru()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Simple LLM Application\n", + "\n", + "This example uses LlamaIndex which internally uses an OpenAI LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index import VectorStoreIndex, SimpleWebPageReader\n", + "\n", + "documents = SimpleWebPageReader(\n", + " html_to_text=True\n", + ").load_data([\"http://paulgraham.com/worked.html\"])\n", + "index = VectorStoreIndex.from_documents(documents)\n", + "\n", + "query_engine = index.as_query_engine()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Send your first request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = query_engine.query(\"What did the author do growing up?\")\n", + "print(response)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Feedback Function(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# Initialize Huggingface-based feedback function collection class:\n", + "hugs = feedback.Huggingface()\n", + "openai = feedback.OpenAI()\n", + "\n", + "# Define a language match feedback function using HuggingFace.\n", + "f_lang_match = Feedback(hugs.language_match).on_input_output()\n", + "# By default this will check language match on the main app input and main app\n", + "# output.\n", + "\n", + "# Question/answer relevance between overall question and answer.\n", + "f_qa_relevance = Feedback(openai.relevance).on_input_output()\n", + "\n", + "# Question/statement relevance between question and each context chunk.\n", + "f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(\n", + " TruLlama.select_source_nodes().node.text\n", + ").aggregate(np.mean)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instrument app for logging with TruLens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru_query_engine_recorder = TruLlama(query_engine,\n", + " app_id='LlamaIndex_App1',\n", + " feedbacks=[f_lang_match, f_qa_relevance, f_qs_relevance])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# or as context manager\n", + "with tru_query_engine_recorder as recording:\n", + " query_engine.query(\"What did the author do growing up?\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Explore in a Dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru.run_dashboard() # open a local streamlit app to explore\n", + "\n", + "# tru.stop_dashboard() # stop if needed" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Leaderboard\n", + "\n", + "Understand how your LLM application is performing at a glance. Once you've set up logging and evaluation in your application, you can view key performance statistics including cost and average feedback value across all of your LLM apps using the app leaderboard. As you iterate new versions of your LLM application, you can compare their performance across all of the different quality metrics you've set up.\n", + "\n", + "Note: Average feedback values are returned and displayed in a range from 0 (worst) to 1 (best).\n", + "\n", + "![App Leaderboard](https://www.trulens.org/Assets/image/Leaderboard.png)\n", + "\n", + "To dive deeper on a particular app, click \"Select App\".\n", + "\n", + "### Understand app performance with Evaluations\n", + " \n", + "To learn more about the performance of a particular app or LLM model, we can select it to view its evaluations at the record level. LLM quality is assessed through the use of feedback functions. Feedback functions are extensible methods for determining the quality of LLM responses and can be applied to any downstream LLM task. Out of the box we provide a number of feedback functions for assessing model agreement, sentiment, relevance and more.\n", + "\n", + "The evaluations tab provides record-level metadata and feedback on the quality of your LLM application.\n", + "\n", + "![Evaluations](https://www.trulens.org/Assets/image/Leaderboard.png)\n", + "\n", + "### Deep dive into full app metadata\n", + "\n", + "Click on a record to dive deep into all of the details of your app stack and underlying LLM, captured by tru_query_engine_recorder.\n", + "\n", + "![Explore an App](https://www.trulens.org/Assets/image/Chain_Explore.png)\n", + "\n", + "If you prefer the raw format, you can quickly get it using the \"Display full app json\" or \"Display full record json\" buttons at the bottom of the page." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: Feedback functions evaluated in the deferred manner can be seen in the \"Progress\" page of the TruLens dashboard." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Or view results directly in your notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "vscode": { + "interpreter": { + "hash": "7d153714b979d5e6d08dd8ec90712dd93bff2c9b6c1f0c118169738af3430cd4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb b/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb new file mode 100644 index 000000000..634fd4724 --- /dev/null +++ b/trulens_eval/examples/quickstart/colab/text2text_quickstart_colab.ipynb @@ -0,0 +1,272 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U trulens-eval\n", + "\n", + "# Google Colab Dependencies\n", + "!npm install localtunnel -q\n", + "!pip install -q streamlit>=1.26.0" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Text to Text Quickstart\n", + "\n", + "In this quickstart you will create a simple text to text application and learn how to log it and get feedback.\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/main/trulens_eval/examples/quickstart/text2text_quickstart.ipynb)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "### Add API keys\n", + "For this quickstart you will need Open AI and Huggingface keys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"OPENAI_API_KEY\"] = \"...\"\n", + "os.environ[\"HUGGINGFACE_API_KEY\"] = \"...\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import openai\n", + "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import from TruLens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import JSON\n", + "\n", + "# Imports main tools:\n", + "from trulens_eval import Feedback, Huggingface, Tru\n", + "tru = Tru()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Simple Text to Text Application\n", + "\n", + "This example uses a bare bones OpenAI LLM, and a non-LLM just for demonstration purposes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def llm_standalone(prompt):\n", + " return openai.ChatCompletion.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a question and answer bot, and you answer super upbeat.\"},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ]\n", + " )[\"choices\"][0][\"message\"][\"content\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hashlib\n", + "def simple_hash_callable(prompt):\n", + " h = hashlib.shake_256(prompt.encode('utf-8'))\n", + " return str(h.hexdigest(20))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Send your first request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "prompt_input=\"How good is language AI?\"\n", + "prompt_output = llm_standalone(prompt_input)\n", + "prompt_output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "simple_hash_callable(prompt_input)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Feedback Function(s)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize Huggingface-based feedback function collection class:\n", + "hugs = Huggingface()\n", + "\n", + "# Define a sentiment feedback function using HuggingFace.\n", + "f_sentiment = Feedback(hugs.positive_sentiment).on_output()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instrument the callable for logging with TruLens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from trulens_eval import TruBasicApp\n", + "tru_llm_standalone_recorder = TruBasicApp(llm_standalone, app_id=\"Happy Bot\", feedbacks=[f_sentiment])\n", + "tru_simple_hash_callable_recorder = TruBasicApp(simple_hash_callable, app_id=\"Hasher\", feedbacks=[f_sentiment])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with tru_llm_standalone_recorder as recording:\n", + " tru_llm_standalone_recorder.app(prompt_input)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with tru_simple_hash_callable_recorder as recording:\n", + " tru_simple_hash_callable_recorder.app(prompt_input)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Explore in a Dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru.run_dashboard() # open a local streamlit app to explore\n", + "\n", + "# tru.stop_dashboard() # stop if needed" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Or view results directly in your notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}