diff --git a/README.md b/README.md index 9e5883c..ba65278 100644 --- a/README.md +++ b/README.md @@ -109,10 +109,10 @@ result = nlp_prompter.fit('ner.jinja', | Task Name | Colab Notebook | Status | |-------------|-------|-------| -| Named Entity Recognition | [NER Examples with GPT-3](https://colab.research.google.com/drive/16DUUV72oQPxaZdGMH9xH1WbHYu6Jqk9Q?usp=sharing) | ✅ | -| Multi-Label Text Classification | [Classification Examples with GPT-3](https://colab.research.google.com/drive/1gNqDxNyMMUO67DxigzRAOa7C_Tcr2g6M?usp=sharing) | ✅ | -| Multi-Class Text Classification | [Classification Examples with GPT-3](https://colab.research.google.com/drive/1gNqDxNyMMUO67DxigzRAOa7C_Tcr2g6M?usp=sharing) | ✅ | -| Binary Text Classification | [Classification Examples with GPT-3](https://colab.research.google.com/drive/1gNqDxNyMMUO67DxigzRAOa7C_Tcr2g6M?usp=sharing) | ✅ | +| Named Entity Recognition | [NER Examples with GPT-3](notebooks/NER%20Examples%20with%20GPT-3.ipynb) | ✅ | +| Multi-Label Text Classification | [Classification Examples with GPT-3](notebooks/Classification%20Examples%20with%20GPT-3.ipynb) | ✅ | +| Multi-Class Text Classification | [Classification Examples with GPT-3](notebooks/Classification%20Examples%20with%20GPT-3.ipynb) | ✅ | +| Binary Text Classification | [Classification Examples with GPT-3](notebooks/Classification%20Examples%20with%20GPT-3.ipynb) | ✅ | | Question-Answering | [QA Task Examples with GPT-3](https://colab.research.google.com/drive/1Yhl7iFb7JF0x89r1L3aDuufydVWX_VrL?usp=sharing) | ✅ | | Question-Answer Generation | [QA Task Examples with GPT-3](https://colab.research.google.com/drive/1Yhl7iFb7JF0x89r1L3aDuufydVWX_VrL?usp=sharing) | ✅ | | Relation-Extraction | [Relation-Extraction Examples with GPT-3](https://colab.research.google.com/drive/1iW4QNjllc8ktaQBWh3_04340V-tap1co?usp=sharing) | ✅ | diff --git a/notebooks/Classification Examples with GPT-3.ipynb b/notebooks/Classification Examples with GPT-3.ipynb new file mode 100644 index 0000000..a648a6d --- /dev/null +++ b/notebooks/Classification Examples with GPT-3.ipynb @@ -0,0 +1,655 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "gpuClass": "standard" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4HhNZ9PRTgxM" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!git clone https://github.com/promptslab/Promptify.git\n", + "!pip3 install openai tiktoken huggingface_hub" + ] + }, + { + "cell_type": "markdown", + "source": [ + "

Features 🚀

\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "k5vxlro8lAWu" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Define any LLM model (such as GPT-3) ✅" + ], + "metadata": { + "id": "oGti-CqqaoUw" + } + }, + { + "cell_type": "code", + "source": [ + "%cd /content/Promptify\n", + "\n", + "import json\n", + "import promptify.models\n", + "from promptify import OpenAI\n", + "from promptify import Prompter\n", + "from pprint import pprint\n", + "from IPython.display import Markdown, display\n", + "from IPython.core.display import display, HTML\n", + "\n", + "\n", + "# Define the API key for the OpenAI model\n", + "api_key = \"\"\n", + "\n", + "\n", + "# Create an instance of the OpenAI model, Currently supporting Openai's all model, In future adding more generative models from Hugginface and other platforms\n", + "model = OpenAI(api_key)\n", + "nlp_prompter = Prompter(model)\n", + "\n", + "\n", + "# Example sentence for demonstration\n", + "sent = \"The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\"\n", + "print(sent)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Q1VTSg8XUATO", + "outputId": "4ee14a50-eb2a-4a06-e93e-2fdce0becd06" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/Promptify\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 1: MultiLabel Text Classification Example in 2 Lines of code, with no training data required 🚀\n", + "\n" + ], + "metadata": { + "id": "NXwzgg9PbV0Z" + } + }, + { + "cell_type": "code", + "source": [ + "# MultiLabel Text Classification with No labels, no description, no oneshot, no examples\n", + "# Simple prompt with instructions\n", + "# domain name gives more info to model for better result generation, the parameter is optional\n", + "# Output will be python object -> [[{\"main class\": main classification category, \"1\": 1st level category, \"2\": 2nd level category, ...., \"branch\": sentence branch, \"group\": group of sentence}]]\n", + "\n", + "\n", + "result = nlp_prompter.fit('multilabel_classification.jinja',\n", + " n_output_labels = 5,\n", + " domain = 'clinical', # it could be any domain such as -> financial, education, biomedical etc\n", + " text_input = sent, \n", + " labels = None)\n", + "\n", + "# Output\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 385 + }, + "id": "h9UW8wnZVhXe", + "outputId": "1974d5dc-0ba9-4531-b656-83823b720a2d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "{'1': 'Chronic Conditions',\n", + " '2': 'Pain',\n", + " '3': 'Osteoporosis',\n", + " '4': 'Hypertension',\n", + " '5': 'Depression',\n", + " 'branch': 'Geriatrics',\n", + " 'group': 'Elderly',\n", + " 'main class': 'Medical'}\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "###2 : MultiLabel Text Classification with Custom Classes 🚀\n", + "\n" + ], + "metadata": { + "id": "DQiklr0ugcLJ" + } + }, + { + "cell_type": "code", + "source": [ + "# Case : 2\n", + "# If want to perform MultiLabel Text Classification with custom classes only (hangling out-of-bounds prediction) prompt\n", + "\n", + "classes = ['Medicine','Oncology','Metastasis','Breast cancer','Lung cancer','Cerebrospinal fluid','Tumor microenvironment','Single-cell RNA sequencing','Idiopathic intracranial hypertension']\n", + "\n", + "result = nlp_prompter.fit('multilabel_classification.jinja',\n", + " n_output_labels = len(classes),\n", + " domain = 'clinical',\n", + " text_input = sent, \n", + " labels = classes)\n", + "\n", + "# Output\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 349 + }, + "id": "EObz_MTSebBv", + "outputId": "d8554121-453f-4baf-b433-f506d8cecc87" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "{'1': 'Geriatrics',\n", + " '2': 'Chronic Disease Management',\n", + " '3': 'Pain Management',\n", + " 'branch': 'Geriatrics',\n", + " 'group': 'Chronic Disease Management',\n", + " 'main class': 'Medicine'}\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "###3 : MultiLabel Text Classification with One shot Example 🚀" + ], + "metadata": { + "id": "wShWgSBtixow" + } + }, + { + "cell_type": "code", + "source": [ + "# Case : 3\n", + "# If want to perform MultiLabel Text Classification wit one shot example adding by default\n", + "# Observe The changes in the model's output\n", + "# Output will be python object -> [[{\"main class\": main classification category, \"1\": 1st level category, \"2\": 2nd level category, ...., \"branch\": sentence branch, \"group\": group of sentence}]]\n", + "\n", + "one_shot = \"Leptomeningeal metastases (LM) occur in patients with breast cancer (BC) and lung cancer (LC). The cerebrospinal fluid (CSF) tumour microenvironment (TME) of LM patients is not well defined at a single-cell level. We did an analysis based on single-cell RNA sequencing (scRNA-seq) data and four patient-derived CSF samples of idiopathic intracranial hypertension (IIH)\"\n", + "one_shot = [[one_shot, {'main class': 'Health', '1': 'Medicine', '2': 'Oncology', '3': 'Metastasis', '4': 'Breast cancer', '5': 'Lung cancer', '6': 'Cerebrospinal fluid', '7': 'Tumor microenvironment', '8': 'Single-cell RNA sequencing', '9': 'Idiopathic intracranial hypertension', 'branch': 'Health', 'group': 'Clinical medicine'}]]\n", + "\n", + "classes = ['Medicine','Oncology','Metastasis','Breast cancer','Lung cancer','Cerebrospinal fluid','Tumor microenvironment','Single-cell RNA sequencing','Idiopathic intracranial hypertension']\n", + "\n", + "\n", + "result = nlp_prompter.fit('multilabel_classification.jinja',\n", + " n_output_labels = len(classes),\n", + " domain = 'clinical',\n", + " text_input = sent,\n", + " examples = one_shot,\n", + " labels = classes)\n", + "# Output\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 421 + }, + "id": "59Ozbzh4iwrC", + "outputId": "af6ebe44-756b-4760-d463-a3ee84b994f6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[{'1': 'Medicine',\n", + " '2': 'Osteoporosis',\n", + " '3': 'Hypertension',\n", + " '4': 'Depression',\n", + " '5': 'Atrial fibrillation',\n", + " '6': 'Nausea and vomiting',\n", + " '7': 'Urinary tract infection',\n", + " 'branch': 'Health',\n", + " 'group': 'Clinical medicine',\n", + " 'main class': 'Health'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "###4 : MultiLabel Text Classification with some Domain Knowledge 🚀" + ], + "metadata": { + "id": "cLT5okKkhhbA" + } + }, + { + "cell_type": "code", + "source": [ + "#Case : 4\n", + "#If want to give some domain knowledge and description in prompt to enhance the output\n", + "\n", + "one_shot = \"Leptomeningeal metastases (LM) occur in patients with breast cancer (BC) and lung cancer (LC). The cerebrospinal fluid (CSF) tumour microenvironment (TME) of LM patients is not well defined at a single-cell level. We did an analysis based on single-cell RNA sequencing (scRNA-seq) data and four patient-derived CSF samples of idiopathic intracranial hypertension (IIH)\"\n", + "one_shot = [[one_shot, {'main class': 'Health', '1': 'Medicine', '2': 'Oncology', '3': 'Metastasis', '4': 'Breast cancer', '5': 'Lung cancer', '6': 'Cerebrospinal fluid', '7': 'Tumor microenvironment', '8': 'Single-cell RNA sequencing', '9': 'Idiopathic intracranial hypertension', 'branch': 'Health', 'group': 'Clinical medicine'}]]\n", + "classes = ['Medicine','Oncology','Metastasis','Breast cancer','Lung cancer','Cerebrospinal fluid','Tumor microenvironment','Single-cell RNA sequencing','Idiopathic intracranial hypertension']\n", + "\n", + "result = nlp_prompter.fit('multilabel_classification.jinja',\n", + " n_output_labels = len(classes),\n", + " domain = 'clinical',\n", + " text_input = sent,\n", + " examples = one_shot,\n", + " description = \"Below Paragraph is from discharge summary of a patient. The Paragraph describes the condition and symptoms of patient.\",\n", + " labels = classes)\n", + "# Output\n", + "\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 421 + }, + "id": "C7_39E61fWn3", + "outputId": "d472b977-ab51-4d8d-ba04-50ee7e845649" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[{'1': 'Medicine',\n", + " '2': 'Osteoporosis',\n", + " '3': 'Hypertension',\n", + " '4': 'Depression',\n", + " '5': 'Atrial fibrillation',\n", + " '6': 'Nausea and vomiting',\n", + " '7': 'Urinary tract infection',\n", + " 'branch': 'Health',\n", + " 'group': 'Clinical medicine',\n", + " 'main class': 'Health'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### MultiClass Text Classification Example" + ], + "metadata": { + "id": "_ENbs83srWDj" + } + }, + { + "cell_type": "code", + "source": [ + "# Multiclass text classification example\n", + "\n", + "labels = {'surprise', 'neutral', 'hate', 'joy', 'worry', 'sadness'}\n", + "\n", + "\n", + "result = nlp_prompter.fit('multiclass_classification.jinja',\n", + " labels=labels,\n", + " text_input=\"Amazing customer service.\",\n", + " )\n", + "\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 260 + }, + "id": "0pYiH3OfrfdN", + "outputId": "4ff6c23b-5ffa-4a05-df24-99366728a568" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Amazing customer service.

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[{'C': 'joy'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Binary Text Classification Example" + ], + "metadata": { + "id": "QkEIEdx3sBaw" + } + }, + { + "cell_type": "code", + "source": [ + "# binary text classification example\n", + "\n", + "result = nlp_prompter.fit('binary_classification.jinja',\n", + " label_0=\"positive\",\n", + " label_1=\"negative\",\n", + " text_input=\"Amazing customer service.\",\n", + " model_name=\"text-davinci-003\")\n", + "\n", + "\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 260 + }, + "id": "1S7c2yNsrq_3", + "outputId": "ae0c3d9d-a075-4709-ca87-7c3c555953f6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Amazing customer service.

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[{'C': 'Positive'}]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Na7CbftMsWQZ" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/NER Examples with GPT-3.ipynb b/notebooks/NER Examples with GPT-3.ipynb new file mode 100644 index 0000000..905644e --- /dev/null +++ b/notebooks/NER Examples with GPT-3.ipynb @@ -0,0 +1,493 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "gpuClass": "standard" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4HhNZ9PRTgxM" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!git clone https://github.com/promptslab/Promptify.git\n", + "!pip3 install openai tiktoken huggingface_hub" + ] + }, + { + "cell_type": "markdown", + "source": [ + "

Features 🚀

\n", + "
    \n", + "
  • 🧙‍♀️ NER in 2 lines of code with no training data required
  • \n", + "
  • 🔨 Easily add one shot, two shot, or few shot examples to the prompt
  • \n", + "
  • ✌ Output always provided as a Python object (e.g. list, dictionary) for easy parsing and filtering
  • \n", + "
  • 💥 Custom examples and samples can be easily added to the prompt
  • \n", + "
  • 💰 Optimized prompts to reduce OpenAI token costs (coming soon)
  • \n", + "
\n", + "\n", + "\n" + ], + "metadata": { + "id": "k5vxlro8lAWu" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Define any LLM model (such as GPT-3) ✅" + ], + "metadata": { + "id": "oGti-CqqaoUw" + } + }, + { + "cell_type": "code", + "source": [ + "%cd /content/Promptify\n", + "\n", + "import json\n", + "import promptify.models\n", + "from promptify import OpenAI\n", + "from promptify import Prompter\n", + "from pprint import pprint\n", + "from IPython.display import Markdown, display\n", + "from IPython.core.display import display, HTML\n", + "\n", + "\n", + "# Define the API key for the OpenAI model\n", + "api_key = \"\"\n", + "\n", + "\n", + "# Create an instance of the OpenAI model, Currently supporting Openai's all model, In future adding more generative models from Hugginface and other platforms\n", + "model = OpenAI(api_key)\n", + "nlp_prompter = Prompter(model)\n", + "\n", + "\n", + "# Example sentence for demonstration\n", + "sent = \"The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\"\n", + "print(sent)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Q1VTSg8XUATO", + "outputId": "c1054acf-4e06-412d-deb4-b6e4ae891a64" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/Promptify\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 1: Named Entity Recognition (NER) Example in 2 Lines of code, with no training data required 🚀\n", + "\n" + ], + "metadata": { + "id": "NXwzgg9PbV0Z" + } + }, + { + "cell_type": "code", + "source": [ + "# Named Entity Recognition with No labels, no description, no oneshot, no examples\n", + "# Simple prompt with instructions\n", + "# domain name gives more info to model for better result generation, the parameter is optional\n", + "# Output will be python object -> [ {'E' : Entity Name, 'T': Type of Entity } ]\n", + "\n", + "\n", + "result = nlp_prompter.fit('ner.jinja',\n", + " domain = 'medical',\n", + " text_input = sent, \n", + " labels = None)\n", + "\n", + "# Output\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 403 + }, + "id": "h9UW8wnZVhXe", + "outputId": "a39acaf1-4521-4dfa-869d-dffe7780f6db" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[{'E': '93-year-old', 'T': 'Age'},\n", + " {'E': 'chronic right hip pain', 'T': 'Medical Condition'},\n", + " {'E': 'osteoporosis', 'T': 'Medical Condition'},\n", + " {'E': 'hypertension', 'T': 'Medical Condition'},\n", + " {'E': 'depression', 'T': 'Medical Condition'},\n", + " {'E': 'chronic atrial fibrillation', 'T': 'Medical Condition'},\n", + " {'E': 'severe nausea and vomiting', 'T': 'Symptom'},\n", + " {'E': 'urinary tract infection', 'T': 'Medical Condition'},\n", + " {'Branch': 'Internal Medicine', 'Group': 'Geriatrics'}]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# You can also return the output with start and end position of the tokens. \n", + "# For that use the output format like this -> [{'E': '93-year-old', 'T': 'Age', 'S' : Start Position, 'En' : End Position}]" + ], + "metadata": { + "id": "1dJ1zN1DxXn-" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "###2 : Named Entity Recognition (NER) with Custom Tags/Entities 🚀\n", + "\n" + ], + "metadata": { + "id": "DQiklr0ugcLJ" + } + }, + { + "cell_type": "code", + "source": [ + "# Case : 2\n", + "# If want to perform NER with custom tags only (hangling out-of-bounds prediction) prompt\n", + "\n", + "\n", + "result = nlp_prompter.fit('ner.jinja',\n", + " domain = 'medical',\n", + " text_input = sent, \n", + " labels = [\"SYMPTOM\", \"DISEASE\"])\n", + "\n", + "# Output\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 367 + }, + "id": "EObz_MTSebBv", + "outputId": "3b317e7e-f498-4cac-8f1b-dc5943a9eb0c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[{'E': 'nausea and vomiting', 'T': 'SYMPTOM'},\n", + " {'E': 'urinary tract infection', 'T': 'SYMPTOM'},\n", + " {'E': 'osteoporosis', 'T': 'DISEASE'},\n", + " {'E': 'hypertension', 'T': 'DISEASE'},\n", + " {'E': 'depression', 'T': 'DISEASE'},\n", + " {'E': 'chronic atrial fibrillation', 'T': 'DISEASE'},\n", + " {'branch': 'Medical', 'group': 'History'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "###3 : Named Entity Recognition (NER) with One shot Example 🚀" + ], + "metadata": { + "id": "wShWgSBtixow" + } + }, + { + "cell_type": "code", + "source": [ + "# Case : 3\n", + "# If want to perform NER wit one shot example adding by default\n", + "# Observe The changes in the model's output\n", + "# the example format should be -> [ [Text, [{'E' : Entity Name, 'T': Type of Entity }]] ]\n", + "\n", + "one_shot = \"Leptomeningeal metastases (LM) occur in patients with breast cancer (BC) and lung cancer (LC). The cerebrospinal fluid (CSF) tumour microenvironment (TME) of LM patients is not well defined at a single-cell level. We did an analysis based on single-cell RNA sequencing (scRNA-seq) data and four patient-derived CSF samples of idiopathic intracranial hypertension (IIH)\"\n", + "one_shot = [[one_shot, [{'E': 'DISEASE', 'W': 'Leptomeningeal metastases'}, {'E': 'DISEASE', 'W': 'breast cancer'}, {'E': 'DISEASE', 'W': 'lung cancer'}, {'E': 'BIOMARKER', 'W': 'cerebrospinal fluid'}, {'E': 'DISEASE', 'W': 'tumour microenvironment'}, {'E': 'TEST', 'W': 'single-cell RNA sequencing'}, {'E': 'DISEASE', 'W': 'idiopathic intracranial hypertension'}]]]\n", + "\n", + "\n", + "result = nlp_prompter.fit('ner.jinja',\n", + " domain = 'medical',\n", + " text_input = sent,\n", + " examples = one_shot,\n", + " labels = [\"SYMPTOM\", \"DISEASE\"])\n", + "\n", + "\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 367 + }, + "id": "59Ozbzh4iwrC", + "outputId": "0418753d-cc14-4f45-ab12-c9090154c818" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[[{'E': 'SYMPTOM', 'W': 'chronic right hip pain'},\n", + " {'E': 'DISEASE', 'W': 'osteoporosis'},\n", + " {'E': 'DISEASE', 'W': 'hypertension'},\n", + " {'E': 'SYMPTOM', 'W': 'depression'},\n", + " {'E': 'DISEASE', 'W': 'chronic atrial fibrillation'},\n", + " {'E': 'SYMPTOM', 'W': 'severe nausea and vomiting'},\n", + " {'E': 'DISEASE', 'W': 'urinary tract infection'}]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "###5 : Named Entity Recognition (NER) with some Domain Knowledge 🚀" + ], + "metadata": { + "id": "cLT5okKkhhbA" + } + }, + { + "cell_type": "code", + "source": [ + "#Case : 5\n", + "#If want to give some domain knowledge and description in prompt to enhance the output\n", + "\n", + "result = nlp_prompter.fit('ner.jinja',\n", + " domain = 'clinical',\n", + " text_input = sent,\n", + " examples = one_shot,\n", + " description = \"Below Paragraph is from discharge summary of a patient. The Paragraph describes the condition and symptoms of patient.\",\n", + " labels = [\"SYMPTOM\", \"DISEASE\"])\n", + "\n", + "pprint(eval(result['text']))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 367 + }, + "id": "C7_39E61fWn3", + "outputId": "00027bb8-cb0b-470d-af6d-f571e7006f8c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Sentence

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n", + "\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "

Output

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "[[{'E': 'SYMPTOM', 'W': 'chronic right hip pain'},\n", + " {'E': 'DISEASE', 'W': 'osteoporosis'},\n", + " {'E': 'DISEASE', 'W': 'hypertension'},\n", + " {'E': 'SYMPTOM', 'W': 'depression'},\n", + " {'E': 'DISEASE', 'W': 'chronic atrial fibrillation'},\n", + " {'E': 'SYMPTOM', 'W': 'severe nausea and vomiting'},\n", + " {'E': 'DISEASE', 'W': 'urinary tract infection'}]]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "cgZU6TA1ZSpH" + }, + "execution_count": null, + "outputs": [] + } + ] +}