diff --git a/.github/ISSUE_TEMPLATE/1_bug_report.yaml b/.github/ISSUE_TEMPLATE/1_bug_report.yaml
new file mode 100644
index 00000000..7657b0cd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/1_bug_report.yaml
@@ -0,0 +1,87 @@
+name: Report a bug
+description: Any errors that you encounter.
+labels: ['bug']
+body:
+  - type: markdown
+    attributes:
+      value: >
+        Before you go any further. Is this really a **🐛 bug**?
+
+        If it's a question about how AdalFlow works, have a look at our [AdalFlow documentation](https://adalflow.sylph.ai/)
+        or ask a question on our [Community Discord](https://discord.gg/ezzszrRZvT).
+
+  - type: textarea
+    attributes:
+      label: Bug description
+      description: A description of the 🐛bug🐛.
+      placeholder: |
+        A clear and concise description of what the bug is.
+
+        Include steps to reproduce, the expected behaviour, and the actual behaviour.
+
+        ```
+        The error message you got, with the full traceback if available.
+        ```
+    validations:
+      required: true
+
+  - type: textarea
+    id: versions
+    attributes:
+      label: What version are you seeing the problem on?
+      description: Use `adalflow.__version__` to get the version if it's via pip or "main branch" if you're using the latest code.
+      render: python
+    validations:
+      required: false
+
+  - type: markdown
+    attributes:
+      value: '**Note: The rest of this form is optional, but filling it out may help us to provide better support.**'
+
+  - type: textarea
+    attributes:
+      label: How to reproduce the bug
+      description: >
+        Provide steps and example code here.
+        You can also paste a link to Google Colab.
+        ```python
+        # Sample code to reproduce the problem
+        ```
+      render: python
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Error messages and logs
+      description: >
+        Provide any error messages and/or logs
+      placeholder: '# Copy the complete error messages and logs'
+      value: |
+        ```
+        # Error messages and logs here please
+        ```
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Environment
+      description: |
+        Please provide details about your environment, including the following:
+        - OS (e.g., Linux, Windows, macOS)
+      value: |
+        - OS: [e.g., Linux, Windows, macOS]
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: More info
+      description: Add any other info about the issue here.
+    validations:
+      required: false
+
+  - type: markdown
+    attributes:
+      value: '**Happy engineering!**'
diff --git a/.github/ISSUE_TEMPLATE/2_suggest_improvement.yaml b/.github/ISSUE_TEMPLATE/2_suggest_improvement.yaml
new file mode 100644
index 00000000..a9c471fd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/2_suggest_improvement.yaml
@@ -0,0 +1,27 @@
+name: Improvement suggestion
+description: Suggest an improvement, a code refactor, or deprecation
+labels: ['[adalflow] improvement']
+body:
+  - type: textarea
+    attributes:
+      label: Outline & Motivation
+      description: A clear and concise description of the improvement suggestion
+      placeholder: |
+        Please outline the motivation for the proposal.
+        If this is related to another GitHub issue, please link it here
+    validations:
+      required: true
+
+  - type: textarea
+    attributes:
+      label: Pitch
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Additional context
+      description: Add any other context or screenshots here.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/3_feature_request.yaml b/.github/ISSUE_TEMPLATE/3_feature_request.yaml
new file mode 100644
index 00000000..c11f05f2
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/3_feature_request.yaml
@@ -0,0 +1,33 @@
+name: Feature request
+description: Propose a feature for this project
+labels: ["[adalflow] new feature request"]
+body:
+  - type: textarea
+    attributes:
+      label: Description & Motivation
+      description: A clear and concise description of the feature proposal
+      placeholder: |
+        Please outline the motivation for the proposal.
+        Is your feature request related to a problem? e.g., I'm always frustrated when [...].
+        If this is related to another GitHub issue, please link it here
+
+  - type: textarea
+    attributes:
+      label: Pitch
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Alternatives
+      description: A clear and concise description of any alternative solutions or features you've considered, if any.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Additional context
+      description: Add any other context or screenshots about the feature request here.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/4_documenting.yaml b/.github/ISSUE_TEMPLATE/4_documenting.yaml
new file mode 100644
index 00000000..e5b77b0a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/4_documenting.yaml
@@ -0,0 +1,16 @@
+name: Typos and doc fixes
+description: Tell us about how we can improve our documentation and Google colab/ipynb notebooks.
+labels: ["documentation"]
+body:
+  - type: textarea
+    attributes:
+      label: 📚 Documentation
+      description: A description of the 🐛bug🐛.
+      placeholder: |
+        For typos and doc fixes, please go ahead and:
+
+        - For a simlpe typo or fix, please send directly a PR (no need to create an issue)
+        - If you are not sure about the proper solution, please describe here your finding...
+
+    validations:
+      required: true
diff --git a/.github/ISSUE_TEMPLATE/5_suggest_integration.yaml b/.github/ISSUE_TEMPLATE/5_suggest_integration.yaml
new file mode 100644
index 00000000..819dbd6e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/5_suggest_integration.yaml
@@ -0,0 +1,32 @@
+name: New integration proposal
+description: Propose a new integration for this project, either db, retriever, model_client. We highly recommend you to find a POC from the provider team to work together on this.
+labels: ['[adalflow] integration']
+body:
+  - type: textarea
+    attributes:
+      label: Description & Motivation
+      description: A clear and concise description of the integration proposal
+      placeholder: |
+        Please outline the motivation for the proposal.
+
+
+  - type: textarea
+    attributes:
+      label: Pitch
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Provider POC
+      description: If you have a POC from the provider team, please provide it here.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Additional context
+      description: Add any other context or screenshots about the integration request here.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/6_suggest_usecases_benchmarks.yaml b/.github/ISSUE_TEMPLATE/6_suggest_usecases_benchmarks.yaml
new file mode 100644
index 00000000..ea93a39e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/6_suggest_usecases_benchmarks.yaml
@@ -0,0 +1,32 @@
+name: Suggest use cases and benchmarks
+description: Propose new use cases that AdalFlow should support or benchmarks that we should compare against
+labels: ["new use cases/benchmarks"]
+body:
+  - type: textarea
+    attributes:
+      label: Description & Motivation
+      description: A clear and concise description of the new use case or benchmark proposal
+      placeholder: |
+        Please outline the motivation for the proposal.
+
+
+  - type: textarea
+    attributes:
+      label: Pitch
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Alternatives
+      description: A clear and concise description of any alternative solutions or features you've considered, if any.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Additional context
+      description: Add any other context or screenshots about the feature request here.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index dd84ea78..00000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: ''
-assignees: ''
-
----
-
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
-
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-
-**Desktop (please complete the following information):**
- - OS: [e.g. iOS]
- - Browser [e.g. chrome, safari]
- - Version [e.g. 22]
-
-**Smartphone (please complete the following information):**
- - Device: [e.g. iPhone6]
- - OS: [e.g. iOS8.1]
- - Browser [e.g. stock browser, safari]
- - Version [e.g. 22]
-
-**Additional context**
-Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..065ad099
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,14 @@
+blank_issues_enabled: false
+contact_links:
+  - name: 👍 Upvote an issue
+    url: https://github.com/SylphAI-Inc/AdalFlow/issues
+    about: You should upvote an issue if it is important to you.
+  - name: 💬 Chat with us
+    url: https://discord.gg/ezzszrRZvT
+    about: Live chat with experts, engineers, and users in our Discord community.
+  - name: 📖 Read the documentation
+    url: http://adalflow.sylph.ai/
+    about: Please consult the documentation before opening any issues!
+  # - name: 🙋 Contact us about professional services
+  #   url: https://lightning.ai
+  #   about: Contact the Lightning.ai sales team for paid support.
diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md
deleted file mode 100644
index 48d5f81f..00000000
--- a/.github/ISSUE_TEMPLATE/custom.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: Custom issue template
-about: Describe this issue template's purpose here.
-title: ''
-labels: ''
-assignees: ''
-
----
-
-
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index bbcbbe7d..00000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for this project
-title: ''
-labels: ''
-assignees: ''
-
----
-
-**Is your feature request related to a problem? Please describe.**
-A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-A clear and concise description of what you want to happen.
-
-**Describe alternatives you've considered**
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-Add any other context or screenshots about the feature request here.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 00000000..11b27377
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,38 @@
+## What does this PR do?
+
+<!--
+Please include a summary of the change and which issue is fixed.
+Please also include relevant motivation and context.
+List any dependencies that are required for this change.
+
+If we didn't discuss your PR in Github issues there's a high chance it will not be merged.
+
+The following links the related issue to the PR (https://docs.github.com/en/free-pro-team@latest/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword)
+-->
+
+Fixes #\<issue_number>
+
+<!-- Does your PR introduce any breaking changes? If yes, please list them. -->
+
+<details>
+  <summary><b>Before submitting</b></summary>
+
+- Was this **discussed/agreed** via a GitHub issue? (not for typos and docs)
+- [ ] Did you read the [contributor guideline](https://adalflow.sylph.ai/contributor/index.html)?
+- [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together?
+- Did you make sure to **update the documentation** with your changes? (if necessary)
+- Did you write any **new necessary tests**? (not for typos and docs)
+- [ ] Did you verify new and **existing tests pass** locally with your changes?
+- Did you list all the **breaking changes** introduced by this pull request?
+
+
+</details>
+
+
+<!--
+
+Did you have fun?
+
+Make sure you had fun coding 🙃
+
+-->
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ecf5b070..6f85a6c8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,13 +14,24 @@ repos:
     hooks:
       - id: black
         args: ['--line-length=88']
+        exclude: ^docs/|.*\.(json|yaml|md|txt)$
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.4.2
     hooks:
       # Run the linter.
       - id: ruff
-        args: ['--fix', '--extend-ignore=E402']
+        args: ['--fix']
+        exclude: ^docs/|.*\.(json|yaml|md|txt)$
+
+  # Add local hooks to run custom commands
+  - repo: local
+    hooks:
+      - id: run-make-format
+        name: Run Make Format
+        entry: make format
+        language: system
+        pass_filenames: false
   # - repo: https://github.com/pycqa/flake8
   #   rev: 4.0.1
   #   hooks:
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..3670e02f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,51 @@
+# Define variables for common directories and commands
+PYTHON = poetry run
+SRC_DIR = .
+
+# Default target: Show help
+.PHONY: help
+help:
+	@echo "Available targets:"
+	@echo "  setup            Install dependencies and set up pre-commit hooks"
+	@echo "  format           Run Black and Ruff to format the code"
+	@echo "  lint             Run Ruff to check code quality"
+	@echo "  test             Run tests with pytest"
+	@echo "  precommit        Run pre-commit hooks on all files"
+	@echo "  clean            Clean up temporary files and build artifacts"
+
+# Install dependencies and set up pre-commit hooks
+.PHONY: setup
+setup:
+	poetry install
+	poetry run pre-commit install
+
+# Format code using Black and Ruff
+.PHONY: format
+format:
+	$(PYTHON) black $(SRC_DIR)
+	git ls-files | xargs pre-commit run black --files
+
+# Run lint checks using Ruff
+.PHONY: lint
+lint:
+	$(PYTHON) ruff check $(SRC_DIR)
+
+# Run all pre-commit hooks on all files
+.PHONY: precommit
+precommit:
+	$(PYTHON) pre-commit run --all-files
+
+# Run tests
+.PHONY: test
+test:
+	$(PYTHON) pytest
+
+# Clean up temporary files and build artifacts
+.PHONY: clean
+clean:
+	rm -rf .pytest_cache
+	rm -rf .mypy_cache
+	rm -rf __pycache__
+	rm -rf build dist *.egg-info
+	find . -type d -name "__pycache__" -exec rm -r {} +
+	find . -type f -name "*.pyc" -delete
diff --git a/README.md b/README.md
index a9dab385..61e411fb 100644
--- a/README.md
+++ b/README.md
@@ -33,11 +33,16 @@
         <a href="https://adalflow.sylph.ai/use_cases/question_answering.html">Trainer & Optimizers</a>
     <p>
 </h4>
-
 <p align="center">
     <a href="https://pypi.org/project/adalflow/">
         <img alt="PyPI Version" src="https://img.shields.io/pypi/v/adalflow?style=flat-square">
     </a>
+    <a href="https://pypi.org/project/adalflow/">
+        <img alt="PyPI Downloads" src="https://static.pepy.tech/badge/adalflow">
+    </a>
+    <a href="https://pypi.org/project/adalflow/">
+        <img alt="PyPI Downloads" src="https://static.pepy.tech/badge/adalflow/month">
+    </a>
     <a href="https://star-history.com/#SylphAI-Inc/AdalFlow">
         <img alt="GitHub stars" src="https://img.shields.io/github/stars/SylphAI-Inc/AdalFlow?style=flat-square">
     </a>
@@ -71,8 +76,21 @@ For AI researchers, product teams, and software engineers who want to learn the
 
 
 
+# Quick Start
 
 
+Install AdalFlow with pip:
+
+```bash
+pip install adalflow
+```
+
+Please refer to the [full installation guide](https://adalflow.sylph.ai/get_started/installation.html) for more details.
+
+
+* Try the [Building Quickstart](https://colab.research.google.com/drive/1TKw_JHE42Z_AWo8UuRYZCO2iuMgyslTZ?usp=sharing) in Colab to see how AdalFlow can build the task pipeline, including Chatbot, RAG, agent, and structured output.
+* Try the [Optimization Quickstart](https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/qas/adalflow_object_count_auto_optimization.ipynb) to see how AdalFlow can optimize the task pipeline.
+
 
 # Why AdalFlow
 
@@ -106,6 +124,8 @@ Here is an optimization demonstration on a text classification task:
 
 Among all libraries, AdalFlow achieved the highest accuracy with manual prompting (starting at 82%) and the highest accuracy after optimization.
 
+
+
 Further reading: [Optimize Classification](https://adalflow.sylph.ai/use_cases/classification.html)
 
 ## Light, Modular, and Model-Agnostic Task Pipeline
@@ -122,6 +142,14 @@ You have full control over the prompt template, the model you use, and the outpu
   <img src="https://raw.githubusercontent.com/SylphAI-Inc/LightRAG/main/docs/source/_static/images/AdalFlow_task_pipeline.png" alt="AdalFlow Task Pipeline">
 </p>
 
+Many providers and models accessible via the same interface:
+
+<p align="center">
+  <img src="https://raw.githubusercontent.com/SylphAI-Inc/LightRAG/main/docs/source/_static/images/multi-providers.png" alt="AdalFlow Model Providers">
+</p>
+
+[All available model providers](https://adalflow.sylph.ai/apis/components/components.model_client.html)
+
 
 <!-- LLMs are like water; they can be shaped into anything, from GenAI applications such as chatbots, translation, summarization, code generation, and autonomous agents to classical NLP tasks like text classification and named entity recognition. They interact with the world beyond the model’s internal knowledge via retrievers, memory, and tools (function calls). Each use case is unique in its data, business logic, and user experience.
 
@@ -187,15 +215,6 @@ Just define it as a ``Parameter`` and pass it to AdalFlow's ``Generator``.
 
 </p>
 
-# Quick Install
-
-Install AdalFlow with pip:
-
-```bash
-pip install adalflow
-```
-
-Please refer to the [full installation guide](https://adalflow.sylph.ai/get_started/installation.html) for more details.
 
 
 
@@ -217,8 +236,15 @@ AdalFlow full documentation available at [adalflow.sylph.ai](https://adalflow.sy
 # AdalFlow: A Tribute to Ada Lovelace
 
 
-AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could do more than just perform calculations. As a female-led team, we aim to inspire more women to enter the AI field.
+AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could go beyond mere calculations. As a team led by a female founder, we aim to inspire more women to pursue careers in AI.
+
+# Community & Contributors
+
+The AdalFlow is a community-driven project, and we welcome everyone to join us in building the future of LLM applications.
+
+Join our [Discord](https://discord.gg/ezzszrRZvT) community to ask questions, share your projects, and get updates on AdalFlow.
 
+To contribute, please read our [Contributor Guide](https://adalflow.sylph.ai/contributor/index.html).
 
 # Contributors
 
diff --git a/adalflow/.gitignore b/adalflow/.gitignore
index 154c3daf..41d39118 100644
--- a/adalflow/.gitignore
+++ b/adalflow/.gitignore
@@ -1,3 +1,4 @@
 tests/log
 *.png
 *.svg
+CONTRIBUTING.md
diff --git a/adalflow/CHANGELOG.md b/adalflow/CHANGELOG.md
index 1118dfb8..e5b806f8 100644
--- a/adalflow/CHANGELOG.md
+++ b/adalflow/CHANGELOG.md
@@ -1,3 +1,23 @@
+
+## [0.2.6] - 2024-11-25
+### Improved
+- Add default `max_tokens=512` to the `AnthropicAPIClient` to avoid the error when the user does not provide the `max_tokens` in the prompt.
+
+## [0.2.5] - 2024-10-28
+
+### Fixed
+- `DataClassParser` nested data class parsing where we have to use `from_dict(json_dict)` instead of `(**json_dict)` to parse the nested data class.
+
+## [0.2.4] - 2024-10-27
+
+### Added
+- `BedrockAPIClient` to support the AWS Bedrock API. Note: still need more testing and documentation.
+
+### Fixed
+- `Generator` cache path: fixed the issue with invalid window path with incompaticle special characters: https://github.com/SylphAI-Inc/AdalFlow/issues/237
+- Wrong score in the diagnose file as `prepare_eval` is not passing classification label 0 to the `eval_input`.
+
+
 ## [0.2.3] - 2024-09-20
 
 ### Rename
diff --git a/adalflow/CONTRIBUTING.md b/adalflow/CONTRIBUTING.md
deleted file mode 100644
index 7c69a733..00000000
--- a/adalflow/CONTRIBUTING.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## poetry
-
-use `poetry lock --no-update` to not update the version of the dependencies in the lock file.
diff --git a/adalflow/PACKAGING.md b/adalflow/PACKAGING.md
index 7ecb1dbe..11d87306 100644
--- a/adalflow/PACKAGING.md
+++ b/adalflow/PACKAGING.md
@@ -1,4 +1,4 @@
-#Poetry Packaging Guide
+# Poetry Packaging Guide
 ## Development
 
 To install optional dependencies, use the following command:
@@ -25,5 +25,18 @@ Test the package locally:
 Better to use a colab to update the whl file and test the installation.
 
 ```bash
-pip install "dist/lightrag-0.1.0b1-py3-none-any.whl[openai,groq,faiss]"
+pip install "dist/adalflow-0.1.0b1-py3-none-any.whl[openai,groq,faiss]"
 ```
+
+
+## Update the version
+
+1. Update the version in `pyproject.toml`
+2. Add the version number in `adalflow/__init__.py`
+3. Build the package
+4. Test the package locally
+5. Push the changes to the repository
+6. Ensure to run `poetry lock --no-update` in the root directory (project-level) to update the lock file for other directories such as `tutorials`, `use_cases`, `benchmarks`, etc.
+7. Update the `CHANGELOG.md` file with the new version number and the changes made in the new version.
+
+## TODO: we need to automate the version update process. Help is appreciated.
diff --git a/adalflow/README.md b/adalflow/README.md
index 173cc829..a72ee377 100644
--- a/adalflow/README.md
+++ b/adalflow/README.md
@@ -28,14 +28,20 @@
     <a href="https://pypi.org/project/adalflow/">
         <img alt="PyPI Version" src="https://img.shields.io/pypi/v/adalflow?style=flat-square">
     </a>
-    <a href="https://star-history.com/#SylphAI-Inc/LightRAG">
-        <img alt="GitHub stars" src="https://img.shields.io/github/stars/SylphAI-Inc/LightRAG?style=flat-square">
+    <a href="https://pypi.org/project/adalflow/">
+        <img alt="PyPI Downloads" src="https://static.pepy.tech/badge/adalflow">
+    </a>
+    <a href="https://pypi.org/project/adalflow/">
+        <img alt="PyPI Downloads" src="https://static.pepy.tech/badge/adalflow/month">
     </a>
-    <a href="https://github.com/SylphAI-Inc/LightRAG/issues">
-        <img alt="Open Issues" src="https://img.shields.io/github/issues-raw/SylphAI-Inc/LightRAG?style=flat-square">
+    <a href="https://star-history.com/#SylphAI-Inc/AdalFlow">
+        <img alt="GitHub stars" src="https://img.shields.io/github/stars/SylphAI-Inc/AdalFlow?style=flat-square">
+    </a>
+    <a href="https://github.com/SylphAI-Inc/AdalFlow/issues">
+        <img alt="Open Issues" src="https://img.shields.io/github/issues-raw/SylphAI-Inc/AdalFlow?style=flat-square">
     </a>
     <a href="https://opensource.org/license/MIT">
-        <img alt="License" src="https://img.shields.io/github/license/SylphAI-Inc/LightRAG">
+        <img alt="License" src="https://img.shields.io/github/license/SylphAI-Inc/AdalFlow">
     </a>
       <a href="https://discord.gg/ezzszrRZvT">
         <img alt="discord-invite" src="https://dcbadge.vercel.app/api/server/ezzszrRZvT?style=flat">
@@ -304,7 +310,8 @@ AdalFlow full documentation available at [adalflow.sylph.ai](https://adalflow.sy
 
 # AdalFlow: A Tribute to Ada Lovelace
 
-AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could do more than just calculations. As a female-led team, we aim to inspire more women to enter the AI field.
+
+AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could go beyond mere calculations. As a team led by a female founder, we aim to inspire more women to pursue careers in AI.
 
 # Contributors
 
diff --git a/adalflow/adalflow/__init__.py b/adalflow/adalflow/__init__.py
index 49496ba1..fa4cd930 100644
--- a/adalflow/adalflow/__init__.py
+++ b/adalflow/adalflow/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.3"
+__version__ = "0.2.6"
 
 from adalflow.core.component import Component, fun_to_component
 from adalflow.core.container import Sequential
@@ -15,6 +15,8 @@
 )
 from adalflow.core.model_client import ModelClient
 from adalflow.core.embedder import Embedder
+
+# parser
 from adalflow.core.string_parser import (
     YamlParser,
     JsonParser,
@@ -30,7 +32,10 @@
     ListOutputParser,
 )
 from adalflow.components.output_parsers.dataclass_parser import DataClassParser
+
 from adalflow.core.prompt_builder import Prompt
+
+# optimization
 from adalflow.optim import (
     Optimizer,
     DemoOptimizer,
diff --git a/adalflow/adalflow/components/model_client/anthropic_client.py b/adalflow/adalflow/components/model_client/anthropic_client.py
index 1c83f421..6d1fa65e 100644
--- a/adalflow/adalflow/components/model_client/anthropic_client.py
+++ b/adalflow/adalflow/components/model_client/anthropic_client.py
@@ -15,7 +15,8 @@
 anthropic = safe_import(
     OptionalPackages.ANTHROPIC.value[0], OptionalPackages.ANTHROPIC.value[1]
 )
-import anthropic
+
+# import anthropic
 from anthropic import (
     RateLimitError,
     APITimeoutError,
@@ -43,7 +44,10 @@ class AnthropicAPIClient(ModelClient):
 
     Visit https://docs.anthropic.com/en/docs/intro-to-claude for more api details.
 
-    Ensure "max_tokens" are set.
+    Note:
+
+    As antropic API needs users to set max_tokens, we set up a default value of 512 for the max_tokens.
+    You can override this value by passing the max_tokens in the model_kwargs.
 
     Reference: 8/1/2024
     - https://docs.anthropic.com/en/docs/about-claude/models
@@ -63,6 +67,7 @@ def __init__(
         self.chat_completion_parser = (
             chat_completion_parser or get_first_message_content
         )
+        self.default_max_tokens = 512
 
     def init_sync_client(self):
         api_key = self._api_key or os.getenv("ANTHROPIC_API_KEY")
@@ -115,6 +120,8 @@ def convert_inputs_to_api_kwargs(
             api_kwargs["messages"] = [
                 {"role": "user", "content": input},
             ]
+            if "max_tokens" not in api_kwargs:
+                api_kwargs["max_tokens"] = self.default_max_tokens
             # if input and input != "":
             #     api_kwargs["system"] = input
         else:
@@ -167,4 +174,4 @@ async def acall(
         elif model_type == ModelType.LLM:
             return await self.async_client.messages.create(**api_kwargs)
         else:
-            raise ValueError(f"model_type {model_type} is not supported")
\ No newline at end of file
+            raise ValueError(f"model_type {model_type} is not supported")
diff --git a/adalflow/adalflow/components/model_client/bedrock_client.py b/adalflow/adalflow/components/model_client/bedrock_client.py
index 2f670d43..b2d5b549 100644
--- a/adalflow/adalflow/components/model_client/bedrock_client.py
+++ b/adalflow/adalflow/components/model_client/bedrock_client.py
@@ -12,7 +12,6 @@
 
 boto3 = safe_import(OptionalPackages.BOTO3.value[0], OptionalPackages.BOTO3.value[1])
 
-import boto3
 from botocore.config import Config
 
 log = logging.getLogger(__name__)
@@ -27,6 +26,7 @@ def get_first_message_content(completion: Dict) -> str:
     r"""When we only need the content of the first message.
     It is the default parser for chat completion."""
     return completion["output"]["message"]["content"][0]["text"]
+    return completion["output"]["message"]["content"][0]["text"]
 
 
 __all__ = [
@@ -34,6 +34,11 @@ def get_first_message_content(completion: Dict) -> str:
     "get_first_message_content",
     "bedrock_runtime_exceptions",
 ]
+__all__ = [
+    "BedrockAPIClient",
+    "get_first_message_content",
+    "bedrock_runtime_exceptions",
+]
 
 
 class BedrockAPIClient(ModelClient):
@@ -56,10 +61,7 @@ class BedrockAPIClient(ModelClient):
         - AWS_SECRET_ACCESS_KEY: The AWS secret access key.
 
 
-    Relevant API docs:
-    1. https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html
-    2. https://docs.aws.amazon.com/bedrock/latest/userguide/getting-started-api-ex-python.html
-    3. https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html#API_runtime_Converse_RequestParameters
+
 
     Example:
 
@@ -75,6 +77,7 @@ class BedrockAPIClient(ModelClient):
         "
 
         # use AWS_PROFILE_NAME and AWS_REGION_NAME from the environment variables in this case
+        # ensure you request the modelId from the AWS team
         self.generator = Generator(
             model_client=BedrockAPIClient(),
             model_kwargs={
@@ -84,12 +87,21 @@ class BedrockAPIClient(ModelClient):
                 }
             }, template=template
         )
+
+    Relevant API docs:
+    1. https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html
+    2. https://docs.aws.amazon.com/bedrock/latest/userguide/getting-started-api-ex-python.html
+    3. https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html#API_runtime_Converse_RequestParameters
+    4. To setup the AWS credentials, follow the instructions here:
+    https://docs.aws.amazon.com/bedrock/latest/userguide/getting-started.html
+    5. Additionally, this medium article is a good reference:
+    https://medium.com/@harangpeter/setting-up-aws-bedrock-for-api-based-text-inference-dc25ab2b216b
     """
 
     def __init__(
         self,
-        aws_profile_name=None,
-        aws_region_name=None,
+        aws_profile_name="default",
+        aws_region_name="us-west-2",  # Use a supported default region
         aws_access_key_id=None,
         aws_secret_access_key=None,
         aws_session_token=None,
@@ -140,6 +152,8 @@ def init_sync_client(self):
             aws_session_token=aws_session_token,
         )
         bedrock_runtime = session.client(service_name="bedrock-runtime", config=config)
+
+        self._client = session.client(service_name="bedrock")
         return bedrock_runtime
 
     def init_async_client(self):
@@ -166,6 +180,21 @@ def track_completion_usage(self, completion: Dict) -> CompletionUsage:
             total_tokens=usage["totalTokens"],
         )
 
+    def list_models(self):
+        # Initialize Bedrock client (not runtime)
+
+        try:
+            response = self._client.list_foundation_models()
+            models = response.get("models", [])
+            for model in models:
+                print(f"Model ID: {model['modelId']}")
+                print(f"  Name: {model['name']}")
+                print(f"  Description: {model['description']}")
+                print(f"  Provider: {model['provider']}")
+                print("")
+        except Exception as e:
+            print(f"Error listing models: {e}")
+
     def convert_inputs_to_api_kwargs(
         self,
         input: Optional[Any] = None,
diff --git a/adalflow/adalflow/components/model_client/google_client.py b/adalflow/adalflow/components/model_client/google_client.py
index 64fdac47..b7e431c8 100644
--- a/adalflow/adalflow/components/model_client/google_client.py
+++ b/adalflow/adalflow/components/model_client/google_client.py
@@ -34,24 +34,27 @@ class GoogleGenAIClient(ModelClient):
 
     Info: 8/1/2024
     Tested: gemini-1.0-pro, gemini-1.5-pro-latest
-    class UsageMetadata(proto.Message):
 
-        prompt_token_count: int = proto.Field(
-            proto.INT32,
-            number=1,
-        )
-        cached_content_token_count: int = proto.Field(
-            proto.INT32,
-            number=4,
-        )
-        candidates_token_count: int = proto.Field(
-            proto.INT32,
-            number=2,
-        )
-        total_token_count: int = proto.Field(
-            proto.INT32,
-            number=3,
-        )
+    .. code-block:: python
+
+        class UsageMetadata(proto.Message):
+
+            prompt_token_count: int = proto.Field(
+                proto.INT32,
+                number=1,
+            )
+            cached_content_token_count: int = proto.Field(
+                proto.INT32,
+                number=4,
+            )
+            candidates_token_count: int = proto.Field(
+                proto.INT32,
+                number=2,
+            )
+            total_token_count: int = proto.Field(
+                proto.INT32,
+                number=3,
+            )
     """
 
     def __init__(self, api_key: Optional[str] = None):
diff --git a/adalflow/adalflow/components/output_parsers/dataclass_parser.py b/adalflow/adalflow/components/output_parsers/dataclass_parser.py
index 057f97d9..6d2e56dd 100644
--- a/adalflow/adalflow/components/output_parsers/dataclass_parser.py
+++ b/adalflow/adalflow/components/output_parsers/dataclass_parser.py
@@ -1,4 +1,4 @@
-"""DataClassParser will help users convert a dataclass to prompt"""
+"""DataClassParser will help users interact with LLMs even better than JsonOutputParser and YamlOutputParser with DataClass."""
 
 from dataclasses import is_dataclass
 from typing import Any, Literal, List, Optional
@@ -43,9 +43,46 @@
 
 
 class DataClassParser(Component):
-    __doc__ = (
-        r"""This is similar to Dspy's signature but more controllable and flexible."""
-    )
+    __doc__ = r"""Made the structured output even simpler compared with JsonOutputParser and YamlOutputParser.
+
+        1. Understands __input_fields__ and __output_fields__ from the DataClass (no need to use include/exclude to decide fields).
+        2. User can choose to save the `task_desc` in the DataClass and use it in the prompt.
+
+        Example:
+
+        .. code-block:: python
+
+            @dataclass
+            class BasicQAOutput(adal.DataClass):
+                explanation: str = field(
+                    metadata={"desc": "A brief explanation of the concept in one sentence."}
+                )
+                example: str = field(
+                    metadata={"desc": "An example of the concept in a sentence."}
+                )
+                # Control output fields order
+                __output_fields__ = ["explanation", "example"]
+
+            # Define the template using jinja2 syntax
+            qa_template = "<SYS>
+            You are a helpful assistant.
+            <OUTPUT_FORMAT>
+            {{output_format_str}}
+            </OUTPUT_FORMAT>
+            </SYS>
+            <USER> {{input_str}} </USER>"
+
+            parser = adal.DataClassParser(data_class=BasicQAOutput, return_data_class=True)
+
+            # Set up the generator with model, template, and parser
+            self.generator = adal.Generator(
+                model_client=model_client,
+                model_kwargs=model_kwargs,
+                template=qa_template,
+                prompt_kwargs={"output_format_str": parser.get_output_format_str()},
+                output_processors=parser,
+            )
+        """
 
     def __init__(
         self,
@@ -132,10 +169,10 @@ def get_examples_str(
     def call(self, input: str) -> Any:
         r"""Parse the output string to the desired format and return the parsed output."""
         try:
-            output = self._output_processor(input)
+            output_dict = self._output_processor(input)
             if self._return_data_class:
-                return self._data_class(**output)
-            return output
+                return self._data_class.from_dict(output_dict)
+            return output_dict
         except Exception as e:
             log.error(f"Error at parsing output: {e}")
             raise ValueError(f"Error: {e}")
diff --git a/adalflow/adalflow/components/output_parsers/outputs.py b/adalflow/adalflow/components/output_parsers/outputs.py
index b38f63a4..1f4ff652 100644
--- a/adalflow/adalflow/components/output_parsers/outputs.py
+++ b/adalflow/adalflow/components/output_parsers/outputs.py
@@ -1,4 +1,11 @@
-"""The most commonly used output parsers for the Generator."""
+"""The most commonly used output parsers for the Generator.
+
+Includes:
+- YamlOutputParser: YAML output parser using dataclass for schema extraction.
+- JsonOutputParser: JSON output parser using dataclass for schema extraction.
+- ListOutputParser: List output parser to parse list of objects from the string.
+- BooleanOutputParser: Boolean output parser to parse boolean values from the string.
+"""
 
 from dataclasses import is_dataclass
 from typing import Dict, Any, Optional, List
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
index d3662063..309f954f 100644
--- a/adalflow/adalflow/core/generator.py
+++ b/adalflow/adalflow/core/generator.py
@@ -2,8 +2,9 @@
 
 It is a pipeline that consists of three subcomponents."""
 
-import os
 import json
+import re
+from pathlib import Path
 
 from typing import Any, Dict, Optional, Union, Callable, Tuple, List
 import logging
@@ -114,16 +115,14 @@ def __init__(
 
         template = template or DEFAULT_LIGHTRAG_SYSTEM_PROMPT
 
-        # Cache
-        model_str = (
-            f"{model_client.__class__.__name__}_{model_kwargs.get('model', 'default')}"
-        )
-        _cache_path = (
-            get_adalflow_default_root_path() if cache_path is None else cache_path
+        # create the cache path and initialize the cache engine
+
+        self.set_cache_path(
+            cache_path, model_client, model_kwargs.get("model", "default")
         )
-        self.cache_path = os.path.join(_cache_path, f"cache_{model_str}.db")
 
         CachedEngine.__init__(self, cache_path=self.cache_path)
+
         Component.__init__(self)
         GradComponent.__init__(self)
         CallbackManager.__init__(self)
@@ -148,7 +147,6 @@ def __init__(
         self.mock_output_data: str = "mock data"
         self.data_map_func: Callable = None
         self.set_data_map_func()
-        self.model_str = model_str
         self._use_cache = use_cache
 
         self._kwargs = {
@@ -163,6 +161,25 @@ def __init__(
         }
         self._teacher: Optional["Generator"] = None
 
+    def set_cache_path(self, cache_path: str, model_client: object, model: str):
+        """Set the cache path for the generator."""
+
+        # Construct a valid model string using the client class name and model
+        self.model_str = f"{model_client.__class__.__name__}_{model}"
+
+        # Remove any characters that are not allowed in file names (cross-platform)
+        # On Windows, characters like `:<>?/\|*` are prohibited.
+        self.model_str = re.sub(r"[^a-zA-Z0-9_\-]", "_", self.model_str)
+
+        _cache_path = (
+            get_adalflow_default_root_path() if cache_path is None else cache_path
+        )
+
+        # Use pathlib to handle paths more safely across OS
+        self.cache_path = Path(_cache_path) / f"cache_{self.model_str}.db"
+
+        log.debug(f"Cache path set to: {self.cache_path}")
+
     def get_cache_path(self) -> str:
         r"""Get the cache path for the generator."""
         return self.cache_path
diff --git a/adalflow/adalflow/core/types.py b/adalflow/adalflow/core/types.py
index 41af464a..18724510 100644
--- a/adalflow/adalflow/core/types.py
+++ b/adalflow/adalflow/core/types.py
@@ -39,6 +39,7 @@
     GroqAPIClient,
     OpenAIClient,
     GoogleGenAIClient,
+    OllamaClient,
 )
 
 
@@ -86,6 +87,7 @@ class ModelClientType:
     GROQ = GroqAPIClient
     OPENAI = OpenAIClient
     GOOGLE_GENAI = GoogleGenAIClient
+    OLLAMA = OllamaClient
 
 
 # TODO: define standard required outputs
diff --git a/adalflow/adalflow/optim/optimizer.py b/adalflow/adalflow/optim/optimizer.py
index b6a68d2a..c6fad814 100644
--- a/adalflow/adalflow/optim/optimizer.py
+++ b/adalflow/adalflow/optim/optimizer.py
@@ -67,7 +67,7 @@ def __init__(
         dataset: Sequence[DataClass] = None,
         exclude_input_fields_from_bootstrap_demos: bool = False,
         *args,
-        **kwargs
+        **kwargs,
     ):
         self._weighted = weighted
         self.dataset = dataset
diff --git a/adalflow/adalflow/utils/__init__.py b/adalflow/adalflow/utils/__init__.py
index 1d23aa63..7b4b6012 100644
--- a/adalflow/adalflow/utils/__init__.py
+++ b/adalflow/adalflow/utils/__init__.py
@@ -19,6 +19,9 @@
 from .config import new_components_from_config, new_component
 from .lazy_import import LazyImport, OptionalPackages, safe_import
 from .setup_env import setup_env
+from .data import DataLoader, Dataset, Subset
+from .global_config import get_adalflow_default_root_path
+from .cache import CachedEngine
 
 
 __all__ = [
@@ -43,4 +46,9 @@
     "write_list_to_jsonl",
     "safe_import",
     "setup_env",
+    "DataLoader",
+    "Dataset",
+    "Subset",
+    "get_adalflow_default_root_path",
+    "CachedEngine",
 ]
diff --git a/adalflow/adalflow/utils/cache.py b/adalflow/adalflow/utils/cache.py
index 31fccdaa..c330cdc8 100644
--- a/adalflow/adalflow/utils/cache.py
+++ b/adalflow/adalflow/utils/cache.py
@@ -1,5 +1,7 @@
 import hashlib
 import diskcache as dc
+from pathlib import Path
+from typing import Union
 
 
 def hash_text(text: str):
@@ -15,9 +17,11 @@ def direct(text: str):
 
 
 class CachedEngine:
-    def __init__(self, cache_path: str):
+    def __init__(self, cache_path: Union[str, Path]):
         super().__init__()
-        self.cache_path = cache_path
+        self.cache_path = Path(cache_path)
+        self.cache_path.parent.mkdir(parents=True, exist_ok=True)
+
         self.cache = dc.Cache(cache_path)
 
     def _check_cache(self, prompt: str):
diff --git a/adalflow/poetry.lock b/adalflow/poetry.lock
index 997730ee..d2ee1af5 100644
--- a/adalflow/poetry.lock
+++ b/adalflow/poetry.lock
@@ -2383,46 +2383,50 @@ files = [
 
 [[package]]
 name = "nvidia-cublas-cu12"
-version = "12.1.3.1"
+version = "12.4.5.8"
 description = "CUBLAS native runtime libraries"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
-    {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
+    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"},
+    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"},
+    {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-win_amd64.whl", hash = "sha256:5a796786da89203a0657eda402bcdcec6180254a8ac22d72213abc42069522dc"},
 ]
 
 [[package]]
 name = "nvidia-cuda-cupti-cu12"
-version = "12.1.105"
+version = "12.4.127"
 description = "CUDA profiling tools runtime libs."
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
-    {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
+    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"},
+    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"},
+    {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:5688d203301ab051449a2b1cb6690fbe90d2b372f411521c86018b950f3d7922"},
 ]
 
 [[package]]
 name = "nvidia-cuda-nvrtc-cu12"
-version = "12.1.105"
+version = "12.4.127"
 description = "NVRTC native runtime libraries"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
-    {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
+    {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198"},
+    {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338"},
+    {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:a961b2f1d5f17b14867c619ceb99ef6fcec12e46612711bcec78eb05068a60ec"},
 ]
 
 [[package]]
 name = "nvidia-cuda-runtime-cu12"
-version = "12.1.105"
+version = "12.4.127"
 description = "CUDA Runtime native Libraries"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
-    {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
+    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"},
+    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"},
+    {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:09c2e35f48359752dfa822c09918211844a3d93c100a715d79b59591130c5e1e"},
 ]
 
 [[package]]
@@ -2441,35 +2445,41 @@ nvidia-cublas-cu12 = "*"
 
 [[package]]
 name = "nvidia-cufft-cu12"
-version = "11.0.2.54"
+version = "11.2.1.3"
 description = "CUFFT native runtime libraries"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
-    {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
+    {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399"},
+    {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9"},
+    {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-win_amd64.whl", hash = "sha256:d802f4954291101186078ccbe22fc285a902136f974d369540fd4a5333d1440b"},
 ]
 
+[package.dependencies]
+nvidia-nvjitlink-cu12 = "*"
+
 [[package]]
 name = "nvidia-curand-cu12"
-version = "10.3.2.106"
+version = "10.3.5.147"
 description = "CURAND native runtime libraries"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
-    {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
+    {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9"},
+    {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b"},
+    {file = "nvidia_curand_cu12-10.3.5.147-py3-none-win_amd64.whl", hash = "sha256:f307cc191f96efe9e8f05a87096abc20d08845a841889ef78cb06924437f6771"},
 ]
 
 [[package]]
 name = "nvidia-cusolver-cu12"
-version = "11.4.5.107"
+version = "11.6.1.9"
 description = "CUDA solver native runtime libraries"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
-    {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
+    {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e"},
+    {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260"},
+    {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-win_amd64.whl", hash = "sha256:e77314c9d7b694fcebc84f58989f3aa4fb4cb442f12ca1a9bde50f5e8f6d1b9c"},
 ]
 
 [package.dependencies]
@@ -2479,13 +2489,14 @@ nvidia-nvjitlink-cu12 = "*"
 
 [[package]]
 name = "nvidia-cusparse-cu12"
-version = "12.1.0.106"
+version = "12.3.1.170"
 description = "CUSPARSE native runtime libraries"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
-    {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
+    {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3"},
+    {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1"},
+    {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-win_amd64.whl", hash = "sha256:9bc90fb087bc7b4c15641521f31c0371e9a612fc2ba12c338d3ae032e6b6797f"},
 ]
 
 [package.dependencies]
@@ -2493,36 +2504,36 @@ nvidia-nvjitlink-cu12 = "*"
 
 [[package]]
 name = "nvidia-nccl-cu12"
-version = "2.20.5"
+version = "2.21.5"
 description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
-    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
+    {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"},
 ]
 
 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.6.77"
+version = "12.4.127"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"},
-    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142"},
-    {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:410718cd44962bed862a31dd0318620f6f9a8b28a6291967bcfcb446a6516771"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1"},
 ]
 
 [[package]]
 name = "nvidia-nvtx-cu12"
-version = "12.1.105"
+version = "12.4.127"
 description = "NVIDIA Tools Extension"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
-    {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
+    {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3"},
+    {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a"},
+    {file = "nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485"},
 ]
 
 [[package]]
@@ -3588,13 +3599,13 @@ sqlcipher = ["sqlcipher3_binary"]
 
 [[package]]
 name = "sympy"
-version = "1.13.3"
+version = "1.13.1"
 description = "Computer algebra system (CAS) in Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"},
-    {file = "sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9"},
+    {file = "sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8"},
+    {file = "sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f"},
 ]
 
 [package.dependencies]
@@ -3835,31 +3846,28 @@ files = [
 
 [[package]]
 name = "torch"
-version = "2.4.1"
+version = "2.5.1"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"},
-    {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"},
-    {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"},
-    {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"},
-    {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"},
-    {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"},
-    {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"},
-    {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"},
-    {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"},
-    {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"},
-    {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"},
-    {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"},
-    {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"},
-    {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"},
-    {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"},
-    {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"},
-    {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"},
-    {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"},
-    {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"},
-    {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"},
+    {file = "torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:71328e1bbe39d213b8721678f9dcac30dfc452a46d586f1d514a6aa0a99d4744"},
+    {file = "torch-2.5.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:34bfa1a852e5714cbfa17f27c49d8ce35e1b7af5608c4bc6e81392c352dbc601"},
+    {file = "torch-2.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:32a037bd98a241df6c93e4c789b683335da76a2ac142c0973675b715102dc5fa"},
+    {file = "torch-2.5.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:23d062bf70776a3d04dbe74db950db2a5245e1ba4f27208a87f0d743b0d06e86"},
+    {file = "torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:de5b7d6740c4b636ef4db92be922f0edc425b65ed78c5076c43c42d362a45457"},
+    {file = "torch-2.5.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:340ce0432cad0d37f5a31be666896e16788f1adf8ad7be481196b503dad675b9"},
+    {file = "torch-2.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:603c52d2fe06433c18b747d25f5c333f9c1d58615620578c326d66f258686f9a"},
+    {file = "torch-2.5.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:31f8c39660962f9ae4eeec995e3049b5492eb7360dd4f07377658ef4d728fa4c"},
+    {file = "torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:ed231a4b3a5952177fafb661213d690a72caaad97d5824dd4fc17ab9e15cec03"},
+    {file = "torch-2.5.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:3f4b7f10a247e0dcd7ea97dc2d3bfbfc90302ed36d7f3952b0008d0df264e697"},
+    {file = "torch-2.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:73e58e78f7d220917c5dbfad1a40e09df9929d3b95d25e57d9f8558f84c9a11c"},
+    {file = "torch-2.5.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:8c712df61101964eb11910a846514011f0b6f5920c55dbf567bff8a34163d5b1"},
+    {file = "torch-2.5.1-cp313-cp313-manylinux1_x86_64.whl", hash = "sha256:9b61edf3b4f6e3b0e0adda8b3960266b9009d02b37555971f4d1c8f7a05afed7"},
+    {file = "torch-2.5.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1f3b7fb3cf7ab97fae52161423f81be8c6b8afac8d9760823fd623994581e1a3"},
+    {file = "torch-2.5.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:7974e3dce28b5a21fb554b73e1bc9072c25dde873fa00d54280861e7a009d7dc"},
+    {file = "torch-2.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:46c817d3ea33696ad3b9df5e774dba2257e9a4cd3c4a3afbf92f6bb13ac5ce2d"},
+    {file = "torch-2.5.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8046768b7f6d35b85d101b4b38cba8aa2f3cd51952bc4c06a49580f2ce682291"},
 ]
 
 [package.dependencies]
@@ -3867,25 +3875,26 @@ filelock = "*"
 fsspec = "*"
 jinja2 = "*"
 networkx = "*"
-nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cublas-cu12 = {version = "12.4.5.8", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-cupti-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-nvrtc-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-runtime-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-cudnn-cu12 = {version = "9.1.0.70", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-setuptools = "*"
-sympy = "*"
-triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
+nvidia-cufft-cu12 = {version = "11.2.1.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-curand-cu12 = {version = "10.3.5.147", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusolver-cu12 = {version = "11.6.1.9", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparse-cu12 = {version = "12.3.1.170", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nccl-cu12 = {version = "2.21.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvjitlink-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvtx-cu12 = {version = "12.4.127", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+setuptools = {version = "*", markers = "python_version >= \"3.12\""}
+sympy = {version = "1.13.1", markers = "python_version >= \"3.9\""}
+triton = {version = "3.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
 typing-extensions = ">=4.8.0"
 
 [package.extras]
 opt-einsum = ["opt-einsum (>=3.3)"]
-optree = ["optree (>=0.11.0)"]
+optree = ["optree (>=0.12.0)"]
 
 [[package]]
 name = "tqdm"
@@ -3909,21 +3918,16 @@ telegram = ["requests"]
 
 [[package]]
 name = "triton"
-version = "3.0.0"
+version = "3.1.0"
 description = "A language and compiler for custom Deep Learning operations"
 optional = false
 python-versions = "*"
 files = [
-    {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"},
-    {file = "triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c"},
-    {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
-    {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
-    {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
-    {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
-    {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
-    {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
-    {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
-    {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
+    {file = "triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8"},
+    {file = "triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f34f6e7885d1bf0eaaf7ba875a5f0ce6f3c13ba98f9503651c1e6dc6757ed5c"},
+    {file = "triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8182f42fd8080a7d39d666814fa36c5e30cc00ea7eeeb1a2983dbb4c99a0fdc"},
+    {file = "triton-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dadaca7fc24de34e180271b5cf864c16755702e9f63a16f62df714a8099126a"},
+    {file = "triton-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aafa9a20cd0d9fee523cd4504aa7131807a864cd77dcf6efe7e981f18b8c6c11"},
 ]
 
 [package.dependencies]
diff --git a/adalflow/pyproject.toml b/adalflow/pyproject.toml
index 218c7b25..8b1a68f6 100644
--- a/adalflow/pyproject.toml
+++ b/adalflow/pyproject.toml
@@ -1,8 +1,8 @@
 [tool.poetry]
 name = "adalflow"
 
-version = "0.2.3"
-description = "The Library to Build and Auto-optimize Any LLM Task Pipeline"
+version = "0.2.6"
+description = "The Library to Build and Auto-optimize LLM Applications"
 authors = ["Li Yin <li@sylphai.com>"]
 readme = "README.md"
 repository = "https://github.com/SylphAI-Inc/AdalFlow"
@@ -123,9 +123,20 @@ name = "nvidia-pypi"
 priority = "supplemental"
 url = "https://pypi.nvidia.com"
 
-[tool.ruff]
-exclude = ["images"]
 
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+# for formatting and linting
+[tool.black]
+line-length = 88
+target-version = ["py311"]
+
+[tool.ruff]
+exclude = ["images"]
+lint.extend-ignore = [
+    "E402",  # Ignore module-level import issues
+    "E731",
+    "UP007", # Wants | over Union, which breaks 3.8
+]
diff --git a/adalflow/tests/test_data_class_parser.py b/adalflow/tests/test_data_class_parser.py
new file mode 100644
index 00000000..b87fde4c
--- /dev/null
+++ b/adalflow/tests/test_data_class_parser.py
@@ -0,0 +1,142 @@
+import unittest
+from dataclasses import dataclass, field
+from typing import List
+from adalflow.core.base_data_class import DataClass
+from adalflow.components.output_parsers.dataclass_parser import DataClassParser
+
+
+# Define a basic DataClass for testing
+@dataclass
+class BasicOutput(DataClass):
+    explanation: str = field(
+        metadata={"desc": "A brief explanation of the concept in one sentence."}
+    )
+    example: str = field(metadata={"desc": "An example of the concept in a sentence."})
+    __output_fields__ = ["explanation", "example"]
+
+
+# Define a nested DataClass for testing
+@dataclass
+class NestedOutput(DataClass):
+    title: str
+    description: str
+    items: List[str]
+    __output_fields__ = ["title", "description", "items"]
+
+
+class TestDataClassParser(unittest.TestCase):
+
+    def setUp(self):
+        self.basic_data_class = BasicOutput
+        self.nested_data_class = NestedOutput
+        self.basic_parser = DataClassParser(
+            data_class=self.basic_data_class, return_data_class=True, format_type="json"
+        )
+        self.nested_parser = DataClassParser(
+            data_class=self.nested_data_class,
+            return_data_class=True,
+            format_type="yaml",
+        )
+
+    def test_basic_data_class_json(self):
+        input_instance = BasicOutput(
+            explanation="This is a test.", example="Example sentence."
+        )
+        input_str = self.basic_parser.get_input_str(input_instance)
+        self.assertIn("This is a test.", input_str)
+        self.assertIn("Example sentence.", input_str)
+
+        output_format_str = self.basic_parser.get_output_format_str()
+        self.assertIn("explanation", output_format_str)
+        self.assertIn("example", output_format_str)
+
+        output = self.basic_parser.call(
+            '{"explanation": "Test explanation", "example": "Test example."}'
+        )
+        self.assertIsInstance(output, BasicOutput)
+
+    def test_basic_data_class_yaml(self):
+        self.yaml_parser = DataClassParser(
+            data_class=self.basic_data_class, return_data_class=True, format_type="yaml"
+        )
+        input_instance = BasicOutput(
+            explanation="This is a test.", example="Example sentence."
+        )
+        input_str = self.yaml_parser.get_input_str(input_instance)
+        self.assertIn("This is a test.", input_str)
+
+        self.assertIn("Example sentence.", input_str)
+
+        output_format_str = self.yaml_parser.get_output_format_str()
+        self.assertIn("explanation", output_format_str)
+        self.assertIn("example", output_format_str)
+
+        output = self.yaml_parser.call(
+            """explanation: Test explanation
+example: Test example."""
+        )
+        print(f"output: {output}")
+        self.assertIsInstance(output, BasicOutput)
+
+    def test_nested_data_class_json(self):
+        input_instance = NestedOutput(
+            title="Title", description="Description", items=["Item 1", "Item 2"]
+        )
+        input_str = self.nested_parser.get_input_str(input_instance)
+        self.assertIn("Title", input_str)
+        self.assertIn("Description", input_str)
+        self.assertIn("Item 1", input_str)
+        self.assertIn("Item 2", input_str)
+
+        output_format_str = self.nested_parser.get_output_format_str()
+        self.assertIn("title", output_format_str)
+        self.assertIn("description", output_format_str)
+        self.assertIn("items", output_format_str)
+
+        output = self.nested_parser.call(
+            """title: Nested Title
+description: Nested description
+items:
+  - Item 1
+  - Item 2"""
+        )
+        self.assertIsInstance(output, NestedOutput)
+
+    def test_nested_data_class_yaml(self):
+        self.nested_parser._format_type = "yaml"
+        input_instance = NestedOutput(
+            title="Title", description="Description", items=["Item 1", "Item 2"]
+        )
+        input_str = self.nested_parser.get_input_str(input_instance)
+        self.assertIn("Title", input_str)
+        self.assertIn("Description", input_str)
+        self.assertIn("Item 1", input_str)
+        self.assertIn("Item 2", input_str)
+
+        output_format_str = self.nested_parser.get_output_format_str()
+        self.assertIn("title", output_format_str)
+        self.assertIn("description", output_format_str)
+        self.assertIn("items", output_format_str)
+
+        output = self.nested_parser.call(
+            """title: Nested Title
+description: Nested description
+items:
+  - Item 1
+  - Item 2"""
+        )
+        self.assertIsInstance(output, NestedOutput)
+
+    def test_invalid_data_class(self):
+        with self.assertRaises(ValueError):
+            DataClassParser(data_class=dict)  # dict is not a dataclass
+
+    def test_invalid_format_type(self):
+        with self.assertRaises(ValueError):
+            DataClassParser(
+                data_class=self.basic_data_class, format_type="xml"
+            )  # Invalid format type
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/adalflow/tests/test_generator.py b/adalflow/tests/test_generator.py
index 5ea8b76d..a15c302a 100644
--- a/adalflow/tests/test_generator.py
+++ b/adalflow/tests/test_generator.py
@@ -3,6 +3,7 @@
 import unittest
 import os
 import shutil
+from pathlib import Path
 
 from openai.types import CompletionUsage
 from openai.types.chat import ChatCompletion
@@ -55,6 +56,32 @@ def test_generator_call(self):
         print(f"output: {output}")
         # self.assertEqual(output.data, "Generated text response")
 
+    def test_cache_path(self):
+        prompt_kwargs = {"input_str": "Hello, world!"}
+        model_kwargs = {"model": "phi3.5:latest"}
+
+        self.test_generator = Generator(
+            model_client=self.mock_api_client,
+            prompt_kwargs=prompt_kwargs,
+            model_kwargs=model_kwargs,
+            use_cache=True,
+        )
+
+        # Convert the path to a string to avoid the TypeError
+        cache_path = self.test_generator.get_cache_path()
+        cache_path_str = str(cache_path)
+
+        print(f"cache path: {cache_path}")
+
+        # Check if the sanitized model string is in the cache path
+        self.assertIn("phi3_5_latest", cache_path_str)
+
+        # Check if the cache path exists as a file (or directory, depending on your use case)
+
+        self.assertTrue(
+            Path(cache_path).exists(), f"Cache path {cache_path_str} does not exist"
+        )
+
     def test_generator_prompt_logger_first_record(self):
         # prompt_kwargs = {"input_str": "Hello, world!"}
         # model_kwargs = {"model": "gpt-3.5-turbo"}
diff --git a/adalflow/tests/test_output_parser.py b/adalflow/tests/test_output_parser.py
index b9502f77..a2b529dc 100644
--- a/adalflow/tests/test_output_parser.py
+++ b/adalflow/tests/test_output_parser.py
@@ -13,6 +13,8 @@ class User(DataClass):
     id: int = field(default=1, metadata={"description": "User ID"})
     name: str = field(default="John", metadata={"description": "User name"})
 
+    __input_fields__ = ["id", "name"]
+
 
 class TestOutputParsers(unittest.TestCase):
 
diff --git a/adalflow/tests/test_random_sample.py b/adalflow/tests/test_random_sample.py
index 3cc6f56f..e6abfb29 100644
--- a/adalflow/tests/test_random_sample.py
+++ b/adalflow/tests/test_random_sample.py
@@ -1,13 +1,12 @@
 import unittest
 from typing import TypeVar
+from adalflow.core.functional import random_sample
+
 
 # Assuming the random_sample function is defined here or imported
 T_co = TypeVar("T_co", covariant=True)
 
 
-from adalflow.core.functional import random_sample
-
-
 class TestRandomSample(unittest.TestCase):
 
     def setUp(self):
diff --git a/adalflow/tutorials/adalflow_dataclasses.py b/adalflow/tutorials/adalflow_dataclasses.py
new file mode 100644
index 00000000..5b927455
--- /dev/null
+++ b/adalflow/tutorials/adalflow_dataclasses.py
@@ -0,0 +1,64 @@
+from dataclasses import dataclass, field
+from typing import Dict
+import adalflow as adal
+from adalflow.components.model_client import GroqAPIClient
+
+# Define the QA template using jinja2 syntax
+qa_template = r"""<SYS>
+You are a helpful assistant.
+<OUTPUT_FORMAT>
+{{output_format_str}}
+</OUTPUT_FORMAT>
+</SYS>
+<USER> {{input_str}} </USER>"""
+
+
+# Define the output structure using dataclass
+@dataclass
+class BasicQAOutput(adal.DataClass):
+    explanation: str = field(
+        metadata={"desc": "A brief explanation of the concept in one sentence."}
+    )
+    example: str = field(metadata={"desc": "An example of the concept in a sentence."})
+    __output_fields__ = ["explanation", "example"]
+
+
+# Define the QA component
+class QA(adal.Component):
+    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
+        super().__init__()
+        parser = adal.DataClassParser(data_class=BasicQAOutput, return_data_class=True)
+        self.generator = adal.Generator(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            template=qa_template,
+            prompt_kwargs={"output_format_str": parser.get_output_format_str()},
+            output_processors=parser,
+        )
+
+    def call(self, query: str):
+        """Synchronous call to generate response"""
+        return self.generator.call({"input_str": query})
+
+    async def acall(self, query: str):
+        """Asynchronous call to generate response"""
+        return await self.generator.acall({"input_str": query})
+
+
+def run_basic_example():
+    """Run a basic example of the QA component"""
+    qa = QA(
+        model_client=GroqAPIClient(),
+        model_kwargs={"model": "llama3-8b-8192"},
+    )
+    response = qa("What is LLM?")
+    print("\nResponse:")
+    print(response)
+    print(f"BasicQAOutput: {response.data}")
+    print(f"Explanation: {response.data.explanation}")
+    print(f"Example: {response.data.example}")
+
+
+if __name__ == "__main__":
+    print("Running basic QA example...")
+    run_basic_example()
diff --git a/adalflow/tutorials/adalflow_text_splitter.py b/adalflow/tutorials/adalflow_text_splitter.py
new file mode 100644
index 00000000..250e2996
--- /dev/null
+++ b/adalflow/tutorials/adalflow_text_splitter.py
@@ -0,0 +1,114 @@
+from adalflow.components.data_process.text_splitter import TextSplitter
+from adalflow.core.types import Document
+from typing import Optional, Dict
+
+
+def split_by_words(
+    text: str, chunk_size: int = 5, chunk_overlap: int = 1, doc_id: Optional[str] = None
+) -> list:
+    """Split text by words with configurable parameters
+
+    Args:
+        text: Input text to split
+        chunk_size: Maximum number of words per chunk
+        chunk_overlap: Number of overlapping words between chunks
+        doc_id: Optional document ID
+
+    Returns:
+        List of Document objects containing the split text chunks
+    """
+    text_splitter = TextSplitter(
+        split_by="word", chunk_size=chunk_size, chunk_overlap=chunk_overlap
+    )
+
+    doc = Document(text=text, id=doc_id or "doc1")
+
+    return text_splitter.call(documents=[doc])
+
+
+def split_by_tokens(
+    text: str, chunk_size: int = 5, chunk_overlap: int = 0, doc_id: Optional[str] = None
+) -> list:
+    """Split text by tokens with configurable parameters
+
+    Args:
+        text: Input text to split
+        chunk_size: Maximum number of tokens per chunk
+        chunk_overlap: Number of overlapping tokens between chunks
+        doc_id: Optional document ID
+
+    Returns:
+        List of Document objects containing the split text chunks
+    """
+    text_splitter = TextSplitter(
+        split_by="token", chunk_size=chunk_size, chunk_overlap=chunk_overlap
+    )
+
+    doc = Document(text=text, id=doc_id or "doc1")
+
+    return text_splitter.call(documents=[doc])
+
+
+def split_by_custom(
+    text: str,
+    split_by: str,
+    separators: Dict[str, str],
+    chunk_size: int = 1,
+    chunk_overlap: int = 0,
+    doc_id: Optional[str] = None,
+) -> list:
+    """Split text using custom separator with configurable parameters
+
+    Args:
+        text: Input text to split
+        split_by: Custom split type that matches separator dict key
+        separators: Dictionary mapping split types to separator strings
+        chunk_size: Maximum chunk size
+        chunk_overlap: Overlap size between chunks
+        doc_id: Optional document ID
+
+    Returns:
+        List of Document objects containing the split text chunks
+    """
+    text_splitter = TextSplitter(
+        split_by=split_by,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        separators=separators,
+    )
+
+    doc = Document(text=text, id=doc_id or "doc1")
+
+    return text_splitter.call(documents=[doc])
+
+
+def example_usage():
+    """Example showing how to use the text splitting functions"""
+    # Word splitting example
+    text = "Example text. More example text. Even more text to illustrate."
+    word_splits = split_by_words(text, chunk_size=5, chunk_overlap=1)
+    print("\nWord Split Example:")
+    for doc in word_splits:
+        print(doc)
+
+    # Token splitting example
+    token_splits = split_by_tokens(text, chunk_size=5, chunk_overlap=0)
+    print("\nToken Split Example:")
+    for doc in token_splits:
+        print(doc)
+
+    # Custom separator example
+    question_text = "What is your name? How old are you? Where do you live?"
+    custom_splits = split_by_custom(
+        text=question_text,
+        split_by="question",
+        separators={"question": "?"},
+        chunk_size=1,
+    )
+    print("\nCustom Separator Example:")
+    for doc in custom_splits:
+        print(doc)
+
+
+if __name__ == "__main__":
+    example_usage()
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 1b7f6824..5ae4aa78 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,3 +1,3 @@
-Benchmarking is an integral development part of the project. 
+Benchmarking is an integral development part of the project.
 
-Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
\ No newline at end of file
+Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
diff --git a/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json b/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
index 13abbe21..a105c71e 100644
--- a/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
+++ b/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
@@ -499,4 +499,4 @@
         "answer": "grand assembly",
         "type": "bridge"
     }
-]
\ No newline at end of file
+]
diff --git a/benchmarks/ReAct_agent/paper_data/paper_dev_10.json b/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
index 2f53d1e7..edada19a 100644
--- a/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
+++ b/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
@@ -429,4 +429,4 @@
             ]
         ]
     }
-]
\ No newline at end of file
+]
diff --git a/benchmarks/ReAct_agent/utils/tools.py b/benchmarks/ReAct_agent/utils/tools.py
index 31a53b27..c0eebd3a 100644
--- a/benchmarks/ReAct_agent/utils/tools.py
+++ b/benchmarks/ReAct_agent/utils/tools.py
@@ -9,15 +9,17 @@
 Apply the similar code for wikipedia search from the Paper (open-source).
 """
 
+
 # copy code from the paper
 def clean_str(p):
-  return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+    return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+
 
 # normalization copied from the paper's code
 def normalize_answer(s):
     def remove_articles(text):
         return re.sub(r"\b(a|an|the)\b", " ", text)
-  
+
     def white_space_fix(text):
         return " ".join(text.split())
 
@@ -39,29 +41,33 @@ def search(entity: str) -> str:
     # Format the entity for URL encoding
     entity_formatted = entity.replace(" ", "+")
     url = f"https://en.wikipedia.org/w/index.php?search={entity_formatted}"
-    
+
     # Fetch the page
     response = requests.get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    
+    soup = BeautifulSoup(response.text, "html.parser")
+
     # Check if the exact page was found or suggest similar items
     # when <div class=mw-search-result-heading> is detected, it means the entity page is not found on wikipedia
     result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})
-    
-    if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
+
+    if (
+        result_divs
+    ):  # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
         # get Similar results
         similar_titles = [div.a.get_text() for div in result_divs]
-        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
+        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}"  # return the top 5 similar titles
     else:
         # the paper uses page to represent content in <p>
         # Extract xontent
-        page_list = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")]
+        page_list = [
+            p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")
+        ]
         # TODO: Recursive search, if find any concept that needs more search then call search again
         # if any("may refer to:" in p for p in page_list):
         #     search(entity)
 
         # restructure & clean the page content following the paper's logic
-        page = ''
+        page = ""
         for p in page_list:
             if len(p.split(" ")) > 2:
                 page += clean_str(p)
@@ -69,28 +75,36 @@ def search(entity: str) -> str:
                     page += "\n"
         paragraphs = page.split("\n")
         paragraphs = [p.strip() for p in paragraphs if p.strip()]
-        
+
         sentences = []
         for p in paragraphs:
-            sentences += p.split('. ')
-        sentences = [s.strip() + '.' for s in sentences if s.strip()]
-        
+            sentences += p.split(". ")
+        sentences = [s.strip() + "." for s in sentences if s.strip()]
+
         # return the first 5 sentences
         if sentences:
-            return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)
+            return (
+                " ".join(sentences[:5]) if len(sentences) >= 5 else " ".join(sentences)
+            )
         else:
             return "No content found on this page."
-        
+
         # TODO: clean the paragraphs and return the searched content
 
 
 def lookup(text: str, keyword: str) -> str:
     """
-        returns the sentences containing keyword in the current passage.
+    returns the sentences containing keyword in the current passage.
     """
-    sentences = text.split('.')
-    matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]
+    sentences = text.split(".")
+    matching_sentences = [
+        sentence.strip() + "."
+        for sentence in sentences
+        if keyword.lower() in sentence.lower()
+    ]
     if not matching_sentences:
         return "No sentences found with the keyword."
     else:
-        return ' '.join(matching_sentences)  # Join all matching sentences into a single string
+        return " ".join(
+            matching_sentences
+        )  # Join all matching sentences into a single string
diff --git a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
index 6e87a990..b6cfe9e6 100644
--- a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
+++ b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
@@ -114,7 +114,7 @@ def train(
         **gpt_3_model,
         teacher_model_config=gpt_4o_model,
         text_optimizer_model_config=gpt_4o_model,
-        backward_engine_model_config=gpt_4o_model
+        backward_engine_model_config=gpt_4o_model,
     )
     print(adal_component)
     trainer = adal.Trainer(
diff --git a/docs/Makefile b/docs/Makefile
index 7a9152fd..9e4cbe75 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -8,7 +8,7 @@ SOURCEDIR     = source
 BUILDDIR      = build
 APIDOCOUTDIR  = $(SOURCEDIR)/apis
 PYTHON        := $(shell command -v python3 2>/dev/null || command -v python 2>/dev/null)
-POETRY        = poetry 
+POETRY        = poetry
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/docs/README.md b/docs/README.md
index 7c472013..01f9ae12 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -22,12 +22,13 @@ AdalFlow uses [Sphinx](https://www.sphinx-doc.org/en/master/) and [reStructuredT
 AdalFlow documentation uses `poetry` and `pyproject.toml` to manage the dependencies. You can install all the necessary packages by running:
 
 ```
-
-poetry lock
+poetry lock --no-update
 poetry install
 ```
 
-**NOTE:** The default versions of `sphinx-build` and `poetry` that come with Linux may be out of date for AdalFlow's documentation. Using "pip install" will get the latest versions of those tools.
+**NOTE:** The default versions of `sphinx-build` and `poetry` that come with Linux may be out of date for AdalFlow's documentation. Using "pip install" will get the latest versions of those packages.
+
+
 
 <!--
 All the packages are manged in the project's ``pyproject.toml`` file in the doc dependencies section. You can install all the necessary packages by running:
@@ -99,12 +100,13 @@ For example, in the `index.rst`, the `:caption: Get Started` corresponds to the
 
 Existing sections include:
 
-`get_started/`: Includes installation and AdalFlow in 10 minutes
+* `get_started/`: Includes installation and AdalFlow in 10 minutes
 
-`tutorials/`: Includes our main tutorials
-`use_cases/`: Includes the use cases of AdalFlow that will be added in the future and which accepts community contributions
+* `tutorials/`: Includes our main tutorials
 
-`apis/`: All the source-code-related documents will be included in this directory
+* `use_cases/`: Includes the use cases of AdalFlow that will be added in the future and which accepts community contributions
+
+* `apis/`: All the source-code-related documents will be included in this directory
 
 <!-- `resources/`: Include all the AdalFlow-relevant resources. -->
 
@@ -175,23 +177,7 @@ make html
 
 And you will be able to find the newly added use_cases module.
 
-### Add New Docs
-
-If you want to add any written files such as README.md to the documentation, there is an easy way to transform the files to `.rst` files using `Pandoc`.
-
-- First, install Pandoc with Homebrew:
-
-    `brew install pandoc`
 
-- Then run `pandoc -s <input .md file> -o <path/to/target_rst_file>`. For example, in the root directory run `pandoc -s README.md -o docs/source/get_started/introduction.rst`.This command will take content from `README.md` and create an `introduction.rst` file in the specified directory.
-
-After editing, run
-
-```python
-cd docs
-make clean
-make html
-```
 
 ### Commit the Edited Documentation
 
@@ -199,7 +185,7 @@ Remember to exclude any unnecessary files in `.gitignore`. Please don’t commit
 
 Please push your updates to the GitHub repo.
 
-The structure of the code base and the docs:
+The structure of the docs directory looks like this:
 
 ```
 AdalFlow/
@@ -227,12 +213,25 @@ AdalFlow/
 │   ├── conf.py
 │   ├── index.rst
 │   ├── Makefile
-├── core/
-│   ├── __init__.py
-│   ├── module1.py
-│   ├── module2.py
-├── components/
-│   ├── __init__.py
-│   ├── module1.py
-│   ├── module2.py
+│   ├── pyproject.toml
+│   ├── poetry.lock
+```
+
+
+## [Optional] Convert Markdown to reStructuredText
+
+If you want to add any written files such as README.md to the documentation, there is an easy way to transform the files to `.rst` files using `Pandoc`.
+
+- First, install Pandoc with Homebrew:
+
+    `brew install pandoc`
+
+- Then run `pandoc -s <input .md file> -o <path/to/target_rst_file>`. For example, in the root directory run `pandoc -s README.md -o docs/source/get_started/introduction.rst`.This command will take content from `README.md` and create an `introduction.rst` file in the specified directory.
+
+After editing, run
+
+```python
+cd docs
+make clean
+make html
 ```
diff --git a/docs/requirements.txt b/docs/requirements.txt
index e59cca03..14ce30d7 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -8,4 +8,4 @@ PyYAML
 readthedocs-sphinx-search==0.3.2
 numpy
 tqdm
-tiktoken
\ No newline at end of file
+tiktoken
diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
index 95445edc..73ee6a04 100644
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -344,4 +344,4 @@ table tr:hover {
 		font-size: 8px;
 		/* Further adjust text size for smallest screens */
 	}
-}
\ No newline at end of file
+}
diff --git a/docs/source/_static/images/adalflow_code_samples.png b/docs/source/_static/images/adalflow_code_samples.png
new file mode 100644
index 00000000..9a3bf3c2
Binary files /dev/null and b/docs/source/_static/images/adalflow_code_samples.png differ
diff --git a/docs/source/_static/images/adalflow_files.png b/docs/source/_static/images/adalflow_files.png
new file mode 100644
index 00000000..bcffa12b
Binary files /dev/null and b/docs/source/_static/images/adalflow_files.png differ
diff --git a/docs/source/_static/images/adalflow_issue_template.png b/docs/source/_static/images/adalflow_issue_template.png
new file mode 100644
index 00000000..9a85ec44
Binary files /dev/null and b/docs/source/_static/images/adalflow_issue_template.png differ
diff --git a/docs/source/_static/images/adalflow_issues.png b/docs/source/_static/images/adalflow_issues.png
new file mode 100644
index 00000000..527c3a0f
Binary files /dev/null and b/docs/source/_static/images/adalflow_issues.png differ
diff --git a/docs/source/_static/images/multi-providers.png b/docs/source/_static/images/multi-providers.png
new file mode 100644
index 00000000..291b69f3
Binary files /dev/null and b/docs/source/_static/images/multi-providers.png differ
diff --git a/docs/source/_static/images/pr_draft.png b/docs/source/_static/images/pr_draft.png
new file mode 100644
index 00000000..38eca9a5
Binary files /dev/null and b/docs/source/_static/images/pr_draft.png differ
diff --git a/docs/source/apis/components/index.rst b/docs/source/apis/components/index.rst
index 893e7483..fce07dc1 100644
--- a/docs/source/apis/components/index.rst
+++ b/docs/source/apis/components/index.rst
@@ -49,6 +49,7 @@ Output Parsers
 .. autosummary::
 
    components.output_parsers.outputs
+   components.output_parsers.dataclass_parser
 
 Agent
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/apis/index.rst b/docs/source/apis/index.rst
index 13d19ff9..7c124c74 100644
--- a/docs/source/apis/index.rst
+++ b/docs/source/apis/index.rst
@@ -1,3 +1,5 @@
+.. _apis:
+
 API Reference
 =============
 
diff --git a/docs/source/contributor/contribute_to_code.rst b/docs/source/contributor/contribute_to_code.rst
index 86ebaea7..1fe1ca6e 100644
--- a/docs/source/contributor/contribute_to_code.rst
+++ b/docs/source/contributor/contribute_to_code.rst
@@ -1,11 +1,12 @@
-Contribute to Code
+Development Essentials
 ======================================
-This document will cover how you can contribute to lightRAG codebase.
+This document will cover how you can set up the AdalFlow codebase and start coding, testing, and documentation.
 
 Set Up
 ^^^^^^^^^^^^^^^^^^^
-The current ``LightRAG`` code contribution supports `poetry <https://python-poetry.org/>`_ setup only. The team is working on optimizing the library and will get back to support more environment soon.
-If you are only interested in using ``LightRAG`` as a package, please check our `installation guide <https://lightrag.sylph.ai/get_started/installation.html#install-lightrag>`_.
+We mainly use `poetry <https://python-poetry.org/>`_ for dependency management and virtual environment setup.
+
+.. If you are only interested in using ``LightRAG`` as a package, please check our `installation guide <https://lightrag.sylph.ai/get_started/installation.html#install-lightrag>`_.
 
 To set up ``poetry`` and contribute, please check the following steps:
 
@@ -13,11 +14,45 @@ To set up ``poetry`` and contribute, please check the following steps:
 
    .. code-block:: bash
 
-        git clone https://github.com/SylphAI-Inc/LightRAG
-        cd LightRAG
+        git clone https://github.com/SylphAI-Inc/AdalFlow
+        cd AdalFlow
+
+2. **Set Up the AdalFlow Dev Environment:**
+   The AdalFlow source code, tests, and dependencies are in the ``./adalflow`` directory.
+   The ``./adalflow/pyproject.toml`` controls the dependencies for the ``adalflow`` package.
+   Use Poetry to install the dependencies and set up the virtual environment:
+
+   .. code-block:: bash
+
+        cd adalflow
+        poetry install
+        poetry shell
+
+   Test the setup by running the tests at the ``./adalflow`` directory:
+
+   .. code-block:: bash
+
+        pytest tests
+
+3. **Set Up the Root Dev Environment:**
+   At the root directory, we have a ``pyproject.toml`` file that controls the dependencies for the root directory.
+
+   .. code-block:: bash
+
+        poetry install
+        poetry shell
+
+   This will install all relevant dependencies and the files in /use_cases, /tutorials, and /benchmarks will be using the development version of the ``adalflow`` package.
+   You should see output similar to the following:
+
+   .. code-block:: bash
+
+        - Installing adalflow (0.2.5 /Users/liyin/Documents/test/AdalFlow/adalflow)
 
-2. **Configure API Keys:**
 
+
+
+4. **[Optional] Configure API Keys in the Root Directory:**
    Copy the example environment file and add your API keys:
 
    .. code-block:: bash
@@ -31,50 +66,46 @@ To set up ``poetry`` and contribute, please check the following steps:
         # COHERE_API_KEY=YOUR_API_KEY_IF_YOU_USE_COHERE
         # HF_TOKEN=YOUR_API_KEY_IF_YOU_USE_HF
 
-3. **Install Dependencies:**
-
-    The ``./lightrag/pyproject.toml`` controls the dependencies for the ``LightRAG`` package.
-    Use Poetry to install the dependencies and set up the virtual environment:
+   This will be helpful for you to run tutorials, use cases, and benchmarks.
 
-   .. code-block:: bash
-        cd lightrag
-        poetry install
-        poetry shell
 
-Codebase Structure
+Coding
 ^^^^^^^^^^^^^^^^^^^
-It is recommended to check our `LightRAG codebase structure <https://lightrag.sylph.ai/developer_notes/index.html>`_ and current `API references <https://lightrag.sylph.ai/apis/index.html>`_ to familiarize yourself with the directories and paths before contributing.
+Structuring
+~~~~~~~~~~~~~~~
+It is recommended to check our the structuring in :ref:`part1-structuring` and :doc:`../apis/index`
+to understand the codebase structure.
 
-Code Examples
-^^^^^^^^^^^^^^^^^^^
-We want to support you with our best. We have included code samples in the `tutorial <https://lightrag.sylph.ai/developer_notes/index.html>`_ for you to refer to.
+What to code
+~~~~~~~~~~~~~~~
+Please check the :ref:`part3-contributing-steps` to see some coding examples and steps to contribute to the codebase.
 
-We inlcude a list of potential samples(`We are working in progress to add more`):
+Code Tips
+~~~~~~~~~~~~~~~
+* Please follow the `Google Python Style Guide <https://google.github.io/styleguide/pyguide.html>`_.
 
-- `ModelClient integration <https://lightrag.sylph.ai/developer_notes/model_client.html#model-inference-sdks>`_. This document will help if you want to add new models not included in our codebase.
-- `Retriever Integration <https://lightrag.sylph.ai/developer_notes/retriever.html#retriever-in-action>`_. We provide different retrivers but you can create more.
+* Functions and classes should include standard docstrings and comments. Please refer to `documentation contribution guidelines <./contribute_to_document.html>`_ for standard docstrings.
 
-Code Tips
-^^^^^^^^^^^^^^^^^^^
-* When writing code, it is appreciated to include any important docstrings and comments. Please refer to `documentation contribution guidelines <./contribute_to_document.html>`_ for standard docstrings.
-* LightRAG is a Python library and if you could follow the `Google Python Style Guide <https://google.github.io/styleguide/pyguide.html>`_, the codebase will be more consistent.
+Copilot
+~~~~~~~~~~~~~~~
+We suggest you use `GitHub Copilot <https://copilot.github.com/>`_ to help you write code faster and more efficiently.
+You can follow this `Guide <https://docs.github.com/en/copilot/using-github-copilot/getting-code-suggestions-in-your-ide-with-github-copilot>`_ to set it up with your IDE.
+There are other options like `Cursor <https://www.cursor.com/>`_ and `Tabnine <https://www.tabnine.com/>`_ that you can use as well.
 
 Dependencies
-^^^^^^^^^^^^^^^^^^^
-If you want to add any new dependencies to the package, please include them in your PR description to inform us.
-Since we have already set up the testing automatic workflow in GitHub, please also set your new dependencies in 
-``./lightrag/pyproject.toml`` file ``[tool.poetry.group.test.dependencies]`` section to avoid dependency errors in our CI/CD workflow.
-
-In order to correctly add the dependency using ``poetry``, please run 
+~~~~~~~~~~~~~~~
+1. If you want to add any new dependencies to the package, please include them in your PR description to inform us.
+2. Since we have already set up the testing automatic workflow in GitHub, please also set your new dependencies in ``./adalflow/pyproject.toml`` file ``[tool.poetry.group.test.dependencies]`` section to avoid dependency errors in our CI/CD workflow.
+   In order to correctly add the dependency using ``poetry``, please run
 
-.. code-block:: bash
+   .. code-block:: bash
 
-        poetry add --group test <package-name>
+      poetry add --group test <package-name>
 
 Testing
 ^^^^^^^^^^^^^^^^^^^
-After you update the code, please make sure your code is well tested before making a pull request. 
-There is a ``./lightrag/tests`` folder in the project directory to host your unit testing cases.
+After you update the code, please make sure your code is well tested before making a pull request.
+There is a ``./adalflow/tests`` folder in the project directory to host your unit testing cases.
 
 You might need to install the testing packages using ``poetry``:
 
@@ -82,15 +113,27 @@ For example:
 
 .. code-block:: bash
 
-        poetry add --group test unittest
-        poetry add --group test pytest
-        poetry add --group test mypy
+        poetry install # or
+        poetry add --group test
+
 
+You should name your test files with the following format: ``test_<module_name>.py``.
 
-All the test scripts should start with ``test_``. For example, run the individual test for ``components`` with:
+Activate the virtual environment from `./adalflow` and run the tests:
 
 .. code-block:: bash
 
-    python lightrag/tests/test_components.py
+    poetry shell
+    pytest
 
+To run a specific test file, you can use the following command:
 
+.. code-block:: bash
+
+    pytest tests/test_components.py
+
+For more details on testing, please refer to the `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/adalflow/tests/README.md>`_ under the ``./adalflow/tests`` directory.
+
+Documentation
+^^^^^^^^^^^^^^^^^^^
+Please refer to the `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/docs/README.md>`_ under the ``./docs`` directory for more details on how to contribute to the documentation.
diff --git a/docs/source/contributor/contribute_to_document.rst b/docs/source/contributor/contribute_to_document.rst
deleted file mode 100644
index 3b1516cf..00000000
--- a/docs/source/contributor/contribute_to_document.rst
+++ /dev/null
@@ -1,115 +0,0 @@
-Contribute to Documentation
-===============================================
-
-.. contents::
-   :local:
-   :depth: 2
-
-.. _Documentation Contribution:
-
-- **User-Facing Documentation**: Found on the main docs site. These include tutorials, guides, and usage documentation meant for end users.
-- **Developer Documentation**: Located within the repository's READMEs and the ``docs/`` directory. These documents are more technical and intended for contributors and maintainers.
-
-This section is about user-facing documentation.
-
-LightRAG uses `Sphinx <https://www.sphinx-doc.org/en/master/>`_ for documentation, leveraging both `reStructuredText <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_ and Sphinx's `autodoc <https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html>`_ feature to pull docstrings from code and organize them through ``.rst`` files. Our documentation is split into:
-
-Souce Code Docstring Standard
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Sphinx automatically pulls docstrings from source code and uses them as the docs in API reference. For clarity and consistency, we have a standard for all the code contributors.
-
-Aligning with Pytorch, LightRAG uses the `Google style with Sphinx <https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html>`_ for formatting docstrings `(detailed styles) <https://google.github.io/styleguide/pyguide.html>`_, emphasizing **docstring** and **type control** to guarantee the document and code quality.
-
-
-Setup & Build Documentation
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-**1. Clone the GitHub Project**
-
-.. code-block:: bash
-
-    git clone https://github.com/SylphAI-Inc/LightRAG.git
-
-**2. Install Necessary Packages**
-
-LightRAG's documentation style is `pydata_sphinx_theme <https://pydata-sphinx-theme.readthedocs.io/en/stable/>`_.
-
-.. Install by ``pip``:
-
-.. .. code-block:: bash
-
-..     cd docs
-..     pip install -r requirements.txt 
-
-Install by ``poetry`` along with all other dependencies for LightRAG:
-
-.. code-block:: bash
-
-    poetry install
-
-**3. Build the Documentation**
-
-.. code-block:: bash
-
-    cd docs
-    make html
-
-
-**conf.py**
-
-This file (``docs/source/conf.py``) contains configurations used by Sphinx, including extensions, templates, HTML theme, and language settings.
-
-**Source Code Doc-string** 
-
-Follow `Google style docstrings <https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html>`_ to update your source code docstrings. Limit lines to **80** characters for better readability in various environments. 
-
-**RST Files**: Directly edit ``.rst`` files for broader changes or new sections. Use the ``.. toctree::`` directive to link documents.
-
-The ``.rst`` files are in the ``docs/source``. The majority of ``.rst`` files in the ``docs/source/apis`` are generated automatically from the Python code docstrings using ``sphinx-apidoc``.
-
-To shorten the doc generating process, please remove the files that is not included in your project.
-
-The Sphinx build will show warnings but the docs will still be completed.
-
-If you have a module folder containing code, for example, ``components/``, please add the following line to the ``docs/Makefile`` in the ``apidoc:`` section.
-
-.. code-block:: bash
-    
-    @sphinx-apidoc -o $(APIDOCOUTDIR)/components ../components --separate --force
-
-
-**4. View the Documentation Locally**
-
-After building, open ``docs/build/html/index.html`` in a web browser. If you face issues with local resources, such as the browser prohibits loading the web page correctly, run a local server:
-
-.. code-block:: bash
-
-    cd docs/build
-    python -m http.server 8000 <path_to_html_output>
-
-Then navigate to the corresbonding site in your browser. E.g. it can be `http://127.0.0.1:8000/`.
-
-
-
-Adding Documentation Tests
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-To ensure the documentation remains up-to-date, LightRAG uses Sphinx's Doctest extension. Add ``.. testcode::`` to your ``.rst`` files or docstrings and run ``make doctest`` to test your documentation snippets.
-
-To manually run these tests, run:
-
-.. code-block:: bash
-
-    cd docs
-    make doctest
-
-
-Documentation Dependencies
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If your documentation requires any new dependencies, please include it in the ``pyproject.toml`` under the root directory, include it in your PR description and let us know.
-
-Commit Changes
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-After making changes, commit the ``.rst`` and source files, avoiding the ``docs/build`` directory, and push them to your GitHub fork for review.
-
diff --git a/docs/source/contributor/contribution.rst b/docs/source/contributor/contribution.rst
index 5e5485f2..f1c0394c 100644
--- a/docs/source/contributor/contribution.rst
+++ b/docs/source/contributor/contribution.rst
@@ -1,63 +1,473 @@
-LightRAG Contribution Guide
+Contributing Process
 =======================================
-``LightRAG``'s contribution process is similar to most open source projects on GitHub. We encourage new project ideas and the communication between ``LightRAG`` team, developers and the broader community.
-Please don't forget to join us on `Discord <https://discord.com/invite/ezzszrRZvT>`_.
+Welcome to the AdalFlow community!
 
-Contribution Process
-----------------------------
-You are always welcomed to contribute even if you've never participated in open source project before. 
-Here is the basic contribution process:
+We tried to make the process simple and clear, but it can always improve.
+Share your suggestions on `Discord <https://discord.com/invite/ezzszrRZvT>`_ or `Github Discussion <https://github.com/SylphAI-Inc/AdalFlow/discussions>`_.
 
-Environment
+
+Quick Start
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-When contributing, please note:
-LightRAG separates the source code environment and documentation environment. 
+1. The `Github issues <https://github.com/SylphAI-Inc/AdalFlow/issues>`_ is the best place to find your first task. Look for tasks labeled `good first issue`.
+2. Follow the :doc:`./contribute_to_code` to set up your environment and start coding, testing, and documenting.
+3. Review the `PR & Review Process <#pr-review-process>`_ to complete the review and iteration process.
+   We aim to maximize both your learning and the library's quality.
+
 
-* To activate the code environment, you should run ``poetry install`` and ``poetry shell`` under ``./lightrag``. The ``./lightrag/pyproject.toml`` contains the dependencies for the ``LightRAG`` package.
+.. note::
 
-* To activate the documentation environment, you can run ``poetry install`` and ``poetry shell`` under ``.``. The ``./pyproject.toml`` controls documentation dependencies.
+   You can use 👍 to show that you want a particular issue to be addressed.
 
-Find a direction to work on
+.. _part1-structuring:
+Part 1: Structuring
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-The team builds ``LightRAG`` based on latest researches and product cases. But you might have your own task to apply ``LightRAG``.
-Therefore, you can extend ``LightRAG`` and add any new features you believe will solve yours or others' problems.
-If you don't have any idea yet, you can:
+To dive deeper, we’ll explain our repository structure, issue tracking, and label system.
+
+..  what to contribute(with examples), contributing steps with proposal/discussion/coding/testing/documentation/pr/review process.
+.. The coding and testing will be discussed more in details in `Code Contribution Guidelines <./contribute_to_code.html>`_ and the documentation will be discussed in `Documentation Contribution Guidelines <./contribute_to_document.html>`_.
+
+Repo Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Our repository has a clear structure, divided into six subdirectories:
+
+.. code-block:: text
+
+   .
+   ├── .github/
+   ├── adalflow/
+   │   ├── adalflow/
+   │   ├── tests/
+   |   ├── pyproject.toml
+   ├── docs/
+   |   |── pyproject.toml
+   ├── tutorials/
+   ├── use_cases/
+   ├── benchmarks/
+   ├── notebooks/
+   |   ├── tutorials/
+   |   ├── use_cases/
+   |   ├── benchmarks/
+   ├── .env_example
+   ├── .gitignore
+   ├── .pre-commit-config.yaml
+   ├── LICENSE.md
+   ├── README.md
+   ├── poetry.lock
+   ├── pyproject.toml
+
+1. The ``/adalflow`` directory contains the source code for the `AdalFlow` library,including its implementation, tests, and a dedicated `pyproject.toml` file.
+2. The ``/docs`` directory houses the documentation for the `AdalFlow` library and also includes its own `pyproject.toml` file.
+   We use `reStructuredText` for the documentation.  For more details, please refer to `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/docs/README.md>`_ under the ``./docs`` directory.
+3. Additionally, it includes the following directories:
+
+   - ``/tutorials``: Contains tutorials for the `AdalFlow` library, for each core feature or class.
+   - ``/use_cases``: Covers various use cases, likely end to end applications even with auto-optimization.
+   - ``/benchmarks``: Includes benchmarks to evaluate the library with other libraries or frameworks.
+4. ``/notebooks`` directory contains all notebooks used across `tutorials`, `use_cases`, and `benchmarks`.
+
+
+
+Issue & Label System
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We use issues to manage bugs, features, and discussions.
+To organize them, we’ve carefully designed **15** labels.
+
+
+.. figure:: ../_static/images/adalflow_issues.png
+   :alt: AdalFlow Issues
+   :align: center
+   :width: 700px
+
+   **Type**: The type of the issue, such as bug, feature, or discussion.
+
+We use three categories of labels in parallel:
+
+* Type of issue: There are 7 types of issues.
+
+  - ``[adalflow]`` indicates the issue relates to `AdalFlow` source code in the ``/adalflow`` directory.
+  Within this directory, the ``/adalflow`` contains the source code, and the ``/tests`` contains test code.
+  3 labels here include: ``integration``, ``improvement``, ``core feature``.
+
+  - We use ``bug`` for code-related issues across all directories.
+  - ``documentation`` for items in the ``/docs``, ``/tutorials``, ``/use_cases``, ``/benchmarks``, and ``/notebooks`` directories.
+  - ``new use cases/benchmarks`` for issues in ``/use_cases`` and ``/benchmarks``.
+  - ``question`` for general inquiries.
+
+* How to proceed: There are 4 labels.
+
+  - ``good first issue`` for tasks suitable for new contributors.
+  - ``wontfix`` for issues that won’t be addressed in the library.
+  - ``duplicate`` for issues already tracked elsewhere.
+  - ``help wanted`` indicates priority signal, discussion, and pr are needed.
+
+* Priority: There are 3 levels.
+
+  - ``P0`` for the highest priority issues.
+  - ``P1`` for medium priority.
+  - ``P2`` for the lowest priority.
+
+
+.. note::
+   * All the above 14 labels can be used for both issues and PRs.
+   * ``ready-for-pr`` is exclusive to issues and indicates the issue is ready for a PR.
+   * ``ready-to-go`` is exclusive to PRs and indicates the PR is ready to be merged.
+
+This following table will provide a quick overview of them all.
+
+
+.. list-table:: Label overview
+   :header-rows: 1
+   :widths: 40 50 30
+
+   * - Type of issue (7 labels)
+     - How to proceed (3 labels)
+     - Priority (3 labels)
+   * - [adalflow] suggest integration
+     -
+     -
+   * - [adalflow] suggest improvement
+     - wontfix
+     - P0
+   * - [adalflow] suggest core feature
+     - good first issue
+     -
+   * - new use cases/benchmarks
+     - duplicate (aggregate) and close one
+     - P1
+   * - [adalflow] bug
+     - help wanted
+     - P2
+   * - question
+     - ``ready-for-pr``
+     -
+   * - documentation
+     - ``ready-to-go``
+     -
+
+How to create an issue
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We’ve created five templates to make it easy for you to create an issue.
+
+.. figure:: ../_static/images/adalflow_issue_template.png
+   :alt: AdalFlow Issue Template
+   :align: center
+   :width: 700px
+
+   Five templates for creating issues
+
+Each template automatically assigns relevant labels to the issue.
+
 
-* Check the `existing issues <https://github.com/SylphAI-Inc/LightRAG/issues>`_ and see if there is anyone you know how to fix or you'd love to fix.
+How to assign priority
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-* Join us on `Discord <https://discord.com/invite/ezzszrRZvT>`_. We are glad to discuss with you and know what you are interested in here.
+While our team marks priorities based on our best judgment, we also welcome community input to help us prioritize issues.
 
-Figure out the scope of your change
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-**Small:** Most of the pull requests are small. If your change is small, such as fixing a line of bug, please go ahead to push it.
+You can use 👍 to indicate how important a particular issue is to you.
+We’ll consider the ``# of 👍 / time_period`` as an additional signal for setting priorities.
 
-**Big:** But if you are making a new feature, or planning to push a large change, it is recommended to contact us on `Discord <https://discord.com/invite/ezzszrRZvT>`_ first.
 
-**Unknown:** If you have no idea how big it will be, we are here to help you. Please post your idea on `issues <https://github.com/SylphAI-Inc/LightRAG/issues>`_. We will read it carefully and get back to you.
 
-Add your code
+Part 2: What to contribute
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Please check our `code contribution guidelines <./contribute_to_code.html>`_ to work with code.
 
-Pull requests
+This section provides more details on how each issue relates to our codebase.
+We’ll include example PRs to help clarify these connections.
+
+The following table offers a quick overview.
+More details on each type of contribution in Part 3.
+
+.. list-table:: What to Contribute (by 7 Labels) and Example PRs
+   :header-rows: 1
+   :widths: 20 50 30
+
+   * - Label
+     - Contribution Suggestions
+     - Example Issue/PR
+   * - [adalflow] bug
+     - Fix bugs reported in issues, can relate to /adalflow code or /tutorials/user_cases/benchmarks.
+     - `Issue 134 <https://github.com/SylphAI-Inc/AdalFlow/issues/134>`_ and `PR 135 <https://github.com/SylphAI-Inc/AdalFlow/pull/135>`_
+   * - [adalflow] suggest integration
+     - Add new integrations with model inference SDKs (:ref:`model_client<tutorials-model_client>`) or database retrievers (:ref:`retriever<tutorials-retriever>`) or tools or other libraries/frameworks.
+     - `Ollama integration request <https://github.com/SylphAI-Inc/AdalFlow/issues/96>`_ and `PR 97 <https://github.com/SylphAI-Inc/AdalFlow/pull/97>`_. This often involves tests, tutorial, and documentation.
+   * - [adalflow] suggest improvement
+     - Enhance existing features for better performance or usability, can relate to /adalflow code or /tutorials/user_cases/benchmarks.
+     - `Stream the response request <https://github.com/SylphAI-Inc/AdalFlow/issues/149>`_ and `PR 158 <https://github.com/SylphAI-Inc/AdalFlow/pull/158>`_.
+   * - [adalflow] suggest core feature
+     - Develop **new** core functionalities in `/adalflow` directory, such as `text_splitter` or `memory`.
+     -
+   * - new use cases/benchmarks
+     - Design benchmarks or propose new use cases for `adalflow`.
+     -
+   * - documentation
+     - Improve existing documentation under `/docs` or `/notebooks` directories or create new documentation for existing code.
+     - `Issue 194 <https://github.com/SylphAI-Inc/AdalFlow/issues/194>`_, `Issue 123 <https://github.com/SylphAI-Inc/AdalFlow/issues/123>`_,  and `PR 260 <https://github.com/SylphAI-Inc/AdalFlow/pull/260>`_.
+   * - question
+     - Answer user queries or provide clarifications about the library.
+     -
+
+.. _part3-contributing-steps:
+
+Part 3: Contributing Steps
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-**WIP PR:** If you are working on an in pull request that is not ready for review, you can create a PR with **"[WIP]"** to inform us that this PR is a draft **“work in progress”**.
 
-**Finished PR:** You can name your finished PR as **"[New Retriever Integration]"** for example.
-We will carry out code review regularly and provide feedbacks as soon as possible. 
-Please iterate your PR with the feedbacks. We will try our best to reduce the revision workload on your side.
-Once your PR is approved, we will merge the PR for you.
-If you have any concerns about our feedbacks, please feel free to contact us on `Discord <https://discord.com/invite/ezzszrRZvT>`_.
+Once you know what you want to contribute, follow these steps to ensure the quality of your contribution:
+
+1. **Track it.** Create an issue if it doesn’t already exist.
+2. **Learn enough context.** Read the relevant documentation and code inside and outside of AdalFlow.
+   This includes:
+
+   - :ref:`tutorials<source-tutorials>`
+   - :ref:`use_cases<use_cases>`
+   - :ref:`API references<apis>`
+   - tests within `/adalflow/tests` to understand everything you need to know.
+
+   This will help you understand everything necessary for your contribution.
+   We’ll provide examples for each type of contribution in the next section.
+
+   - For integration, you need to know the relevant SDKs and APIs.
+   - For documentation, you need to know the structure of the documentation and the writing style.
+
+3. **Create a solution proposal and gather input.** Write your solution proposal in the issue comments.
+   Alternatively, you can use a publically accessible tool like ``Google Doc`` or ``Colab`` to share your proposal.
+   The `AdalFlow` team and the community will review and provide feedback before your start coding.
+   The team and core contributors can label it as ``ready-for-pr`` when it is ready for a PR.
+   This step is especially crucial for complex features. You can also discuss your proposal on our `Discord <https://discord.com/invite/ezzszrRZvT>`_.
+
+4. **Work on it.**  Follow the `PR & Review Process <#pr-review-process>`_ to begin coding, testing, documenting, and reviewing.
+
+
+.. TODO: edit the remaining content
+
+Integrate a  model_client
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All existing :ref:`model clients<components-model_client>` are located in the `/adalflow/adalflow/components/model_client` directory.
+The tutorial :ref:`model_client<tutorials-model_client>` will help you understand the base class and how it is designed.
+
+In general, `OpenAI SDK <https://platform.openai.com/docs/quickstart>`_ is trending to the the industry standard.
+And you can measure your targetting SDKs by the difference between these two.
+But overall, the OPENAI integration consists of: coding, testing, documentation.
+
+Coding includes:
+
+1. A :ref:`OpenAIClient<components-model_client-openai_client>` class that inherits from the base class :ref:`ModelClient<core.model_client>`.
+2. Add the `sdk package` as an optional package in the `adalflow/pyproject.toml` file.
+3. Add the `sdk package` as lazy import in the `adalflow/adalflow/utils/lazy_import.py` file.
+4. Call the lazy import in the `adalflow/adalflow/components/model_client/__init__.py` file.
+5. Import the new client in the `adalflow/adalflow/__init__.py` file so that we can call it directly from the `adalflow` package.
+
+Testing includes:
+
+1. Create a test file `test_XX_client.py` in the `adalflow/tests/` directory. You can use `test_openai_client.py` as a reference.
+2. Add the package to the `adalflow/pyproject.toml` file under the `[tool.poetry.group.test.dependencies]` section.
+3. Add the test case for the new client in the test file.
+4. Follow the `adalflow/tests/README.md` to run the test.
+
+Documentation includes:
+
+1. Add examples on how to use the new client in the `tutorials` directory. You can use `tutorials/ollama_client_note.py` as a reference.
+2. Make sure you add the new client package in the root `pyproject.toml` file under the `[tool.poetry.dependencies]` section.
+3. Ensure the API reference is correctly rendenered in the `docs` directory.
+   For example, with `ollama_client`, you need to add the following line in the `docs/source/apis/components/index.rst` file:
+
+   .. code-block:: text
+
+      components.model_client.ollama_client
+
+4. Add examplary API configurations in the root `.env_example` file.
+
+
+This `ollama_client PR <https://github.com/SylphAI-Inc/AdalFlow/pull/97>`_ is a good example of how to integrate a new model client.
+
+Integrate a database retriever
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All existing :ref:`retrievers<components-retriever>` are located in the `/adalflow/adalflow/components/retriever` directory.
+The tutorial :ref:`retriever<tutorials-retriever>` will help you understand the base class and how it is designed.
+
+The process is quite similar to integrating a model client. For documentation, ensure you add an example in :ref:`retriever<tutorials-retriever>`.
+
+This `qdrant_retriever PR <https://github.com/SylphAI-Inc/AdalFlow/pull/165>`_ is a good example of how to integrate a new database retriever.
+
+Add notebooks for existing/new tutorials/use_cases/benchmarks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. figure:: ../_static/images/adalflow_code_samples.png
+   :alt: Code samples for tutorials
+
+   :align: center
+   :width: 700px
+
+   Code samples for tutorials
+
+
+.. note::
+  For how to add a new notebook, please follow the `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/notebooks/README.md>`_ in the `notebooks` directory.
+
+**Tutorials**
+
+For :ref:`tutorials<tutorials>` in our documentation, each tutorial is accompanied by two code files: one `XX_note.py` in `/tutorials` and one `adalflow_XX.ipynb` in `/notebooks/tutorials`.
+You can help add the missing code file in tutorials and make sure to link them in the documentation like the above figure.
+Here is one example issue and PR for adding a new tutorial: `Issue 192 <https://github.com/SylphAI-Inc/AdalFlow/issues/192>`_ and `PR 261 <https://github.com/SylphAI-Inc/AdalFlow/pull/261>`_.
+
+**Use Cases**
+
+For :ref:`use_cases<use_cases>` in our documentation, each use case is accompanied by source code in `/use_cases` and a notebook in `/notebooks/use_cases`.
+For our existing use cases, we do not always have the corresponding notebooks. You can help add the missing notebooks for the existing use cases.
+
+**Benchmarks**
+
+So far, we are still working on the code in the `/benchmarks` directory. We will need help on addint the documentation along with the code.
+
+Part 4: Pull Request Process
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Prepare the codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Before you can start a pull request, you need to follow these steps and this `Github official fork guide <https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo?tool=webui>`_:
+
+1. **Fork the repository.**
+   You can fork the repository by clicking the `Fork` button on the top right corner of the repository page.
+
+2. **Clone the repository.**
+   You can clone the repository by running the following command in your terminal:
+
+   .. code-block:: bash
+
+      git clone your_forked_repository_url
+
+3. **Sync your fork.**
+   Also, make sure your repository is in sync with the original owner's repository. You can do this by running the following commands:
+
+   .. code-block:: bash
+
+      git remote -v
+
+   You will not see our repo in the list. You can add it by running the following command:
+
+   .. code-block:: bash
+
+      git remote add upstream https://github.com/SylphAI-Inc/AdalFlow.git
+
+   Now, when you run `git remote -v`, you will see the upstream repo.
+   Then, we can sync your fork with the upstream repo by running the following commands:
+
+   .. code-block:: bash
+
+      git fetch upstream
+      git checkout main
+      git merge upstream/main
+
+   *Note: `fetch` will fetch the changes from the upstream repo, but it will not merge them into your local branch. `merge` will merge the changes from the upstream repo into your local branch.*
+   For more detials, please refer to the `Github official syncing a fork guide <https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork>`_.
+
+4. **Create a new branch.**
+   Create a new branch to ensure your changes are isolated from the main codebase. You can do this by running the following command:
+
+   .. code-block:: bash
+
+      git checkout -b <issue_number>_<issue_title>
+
+   Similarly, you always use step 3 to sync your branch with the upstream repo.
+   Additionally, you can use the following commands to sync:
+
+   .. code-block:: bash
+
+     git fetch --all --prune
+     git rebase upstream/main
+     # follow git instructions to resolve conflicts
+     git push origin your_branch_name
+
+
+Work on your PR
+~~~~~~~~~~~~~~~~~~~~
+
+1. **Set up the pre-commit hooks.**
+   We have a `.pre-commit-config.yaml` file in the root directory.
+   Ensure you have set up the pre-commit hooks. We recommend you to do so in the `poetry` environment.
+   The following steps will help you set up the root poetry environment and the pre-commit hooks:
+   Install `poetry` if you haven't already:
+
+   .. code-block:: bash
+
+    pip install poetry
+
+   You can install the dependencies by running the following command:
+
+   .. code-block:: bash
+
+    poetry install
+
+   Then you can activate the environment by running the following command:
+
+   .. code-block:: bash
+
+    poetry shell
+
+   Then, install the pre-commit hooks by running the following command:
+
+   .. code-block:: bash
+
+    pre-commit install
+
+   *Now, you can start to commit your changes from the `/adalflow` directory next time even if you are not in the poetry environment.*
+   If you have more questions, you can refer to the `pre-commit official guide <https://pre-commit.com/#install>`_.
+
+2. **Commit your changes.**
+   Once you have made your changes, you can commit them by running the following commands:
+
+   .. code-block:: bash
+
+      git add .
+      git commit -m "Your commit message"
+      git push origin your_branch_name
+
+   If you face "permission denied" issue, you can refer to this `medium blog <https://medium.com/geekculture/how-to-change-your-github-remote-authentication-from-username-password-to-personal-access-token-64e527a766cf>`_ for help.
+
+3. **Create a Pull Request.**
+   Go to your forked repository on Github and click the `New Pull Request` button. Make sure you select the correct branch for the base and compare branches.
+   Here we have a default `PR template <https://github.com/SylphAI-Inc/adalflow/blob/main/.github/PULL_REQUEST_TEMPLATE.md>`_ for you to fill in.
+
+4. **Fill in the PR template.**
+   Make sure you fill in the PR template with the necessary information. This will help the reviewers understand your changes better.
+
+5. **Submit the PR**
+   We encourage you to submit the PR as soon as possible, even if it is not ready for review. You can mark it as a draft by:
+   1. Clicking the `Draft` button on the PR page.
+   2. Adding `[WIP]` to the PR title.
+
+   .. figure:: ../_static/images/pr_draft.png
+      :alt: Create a draft PR
+      :align: center
+      :width: 700px
+
+      Create a draft PR
+
+6. **Iterate your PR.**
+   Once you have submitted the PR, the reviewers will review your changes and provide feedback. You can iterate your PR by making the necessary changes and pushing them to your branch. The reviewers will review your changes again.
+
+7. **Merge your PR.**
+   Once your PR is approved, the reviewers will merge your PR for you. You can also merge your PR by clicking the `Merge` button on the PR page.
+
+
+
+Part 5: Review Process
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+For now, we will use the `PyTorch lightning's review guideline <https://github.com/Lightning-AI/pytorch-lightning/wiki/Review-guidelines>`_.
+
+.. Environment
+.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. When contributing, please note:
+.. LightRAG separates the source code environment and documentation environment.
+
+.. * To activate the code environment, you should run ``poetry install`` and ``poetry shell`` under ``./lightrag``. The ``./lightrag/pyproject.toml`` contains the dependencies for the ``LightRAG`` package.
+
+.. * To activate the documentation environment, you can run ``poetry install`` and ``poetry shell`` under ``.``. The ``./pyproject.toml`` controls documentation dependencies.
 
-Writing Documentation
-----------------------------
-It is a good practice to submit your code with documentations to help the ``LightRAG`` team and other developers better understand your updates.
-Please see our `documentation contribution guidelines <./contribute_to_document.html>`_ for more details on ``LightRAG`` documentation standard.
 
 
 
 
-.. admonition:: Resources
-   :class: highlight
 
-   
\ No newline at end of file
+.. .. admonition:: Resources
+..    :class: highlight
diff --git a/docs/source/contributor/index.rst b/docs/source/contributor/index.rst
index 676f65d9..7e2c2da0 100644
--- a/docs/source/contributor/index.rst
+++ b/docs/source/contributor/index.rst
@@ -1,11 +1,34 @@
-For Contributors
+Contributor Guide
 =============================
+Welcome to the AdalFlow community! We're building the most user-friendly, modular, and powerful library for building and auto-optimizing LLM applications, from Chatbots and RAGs to Agents.
+*Think of AdalFlow for LLM applications and prompt engineering as the PyTorch/TensorFlow/JAX equivalent for AI modeling.*
+
+The goal of the library is to provide basic and fundamental building blocks to create advanced applications with auto-optimization out of the box.
+As we mature, we anticipate that more RAG, memory-based chatbots, or agent frameworks will be built on top of AdalFlow’s building blocks, such as `retriever` and `generator`.
+We highly suggest you read our :ref:`design principle<lightrag_design_philosophy>` before you start contributing.
+
+We greatly appreciate all contributions, from bug fixes to new features, and value every contributor.
+However, we must be selective to ensure our library remains reliable for users.
+We hope your contributions go beyond listing your name on the repo—our goal is for you to learn, grow your skills, support your favorite projects, and give back to the community!
+
+The goal of this guide is to design the best process for maintaining the quality of our library while enabling the community to make meaningful contributions.
+It took us three months to set up this contributor guide, as we first tested the process with early contributors.
+*We are determined to make AdalFlow as great and legendary as PyTorch.*
+
+This guide covers the overall contributing process, along with development essentials for environment setup, coding, testing, and documentation.
+
+Here’s to the future of LLM applications!
+
+By `Li Yin <https://github.com/liyin2015>`_.
+
+.. TODO: add a relation to the whole ecosystem
+
 
 
 .. toctree::
-   :maxdepth: 2
+   :caption: Contributor Guide
+   :maxdepth: 1
 
    contribution
    contribute_to_code
-   contribute_to_document
-   .. version_control
\ No newline at end of file
+   .. version_control
diff --git a/docs/source/contributor/version_control.rst b/docs/source/contributor/version_control.rst
index 456c7528..30c2480e 100644
--- a/docs/source/contributor/version_control.rst
+++ b/docs/source/contributor/version_control.rst
@@ -7,7 +7,7 @@ Overview
 --------
 **The version will mainly be managed by the LightRAG team. But we are glad to share how we will release the latest version here.**
 
-This guide outlines the process for releasing a new version of ``LightRAG``. 
+This guide outlines the process for releasing a new version of ``LightRAG``.
 The workflow pipeline validates the version tag, builds the package, runs tests, publishes to PyPI, and creates a release on GitHub. The workflow is triggered by tags pushed to the **Release** branch. See `GitHub tags <https://docs.github.com/en/desktop/managing-commits/managing-tags-in-github-desktop>`_ for more details on version release tagging.
 
 Steps to Release a New Version
@@ -18,7 +18,7 @@ Steps to Release a New Version
 
       [tool.poetry]
       name = "lightrag"
-      
+
       version = "0.0.0-rc.1"
       description = "The 'PyTorch' library for LLM applications. RAG=Retriever-Agent-Generator."
 
@@ -49,7 +49,7 @@ Steps to Release a New Version
       git add lightrag/pyproject.toml
       git commit -m "new version release"
       git push origin release
-   
+
    Since the workflow only processes **tags**, your file submission will not go through the version release workflow.
 
    Only the tags you pushed will get checked.
@@ -66,7 +66,7 @@ Steps to Release a New Version
    .. code-block:: python
 
       git tags # list the existing tags
-      
+
       git tag -d <tag>
       git push origin --delete <tag>
 
diff --git a/docs/source/get_started/adalflow_in_15mins.rst b/docs/source/get_started/adalflow_in_15mins.rst
index 419f4f71..cf372ff1 100644
--- a/docs/source/get_started/adalflow_in_15mins.rst
+++ b/docs/source/get_started/adalflow_in_15mins.rst
@@ -379,7 +379,7 @@ Here’s the minimum code required to get started on evaluating the task pipelin
             self, sample: Example, y_pred: adal.GeneratorOutput
         ) -> float:
             y_label = -1
-            if y_pred and y_pred.data:
+            if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0
                 y_label = y_pred.data
             return self.eval_fn(y=y_label, y_gt=sample.answer)
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0ccb2469..5f265357 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -318,14 +318,6 @@ We are building a library that unites the two worlds, forming a healthy LLM appl
    use_cases/index
 
 
-.. toctree::
-   :glob:
-   :maxdepth: 1
-   :hidden:
-
-   apis/index
-
-
       .. :caption: Benchmarks
 
       .. Manually add documents for the code in benchmarks
@@ -339,9 +331,19 @@ We are building a library that unites the two worlds, forming a healthy LLM appl
 
 .. hide the for contributors now
 
-   .. :glob:
-   .. :maxdepth: 1
-   .. :caption: For Contributors
-   .. :hidden:
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :caption: For Contributors
+   :hidden:
+
+   contributor/index
 
-   .. contributor/index
+
+
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :hidden:
+
+   apis/index
diff --git a/docs/source/integrations/index.rst b/docs/source/integrations/index.rst
new file mode 100644
index 00000000..313c2f1e
--- /dev/null
+++ b/docs/source/integrations/index.rst
@@ -0,0 +1 @@
+The integration will be listing all existing integrations and integrations we plan to add.
diff --git a/docs/source/resources/resources.rst b/docs/source/resources/resources.rst
index 08f77d35..4affa68e 100644
--- a/docs/source/resources/resources.rst
+++ b/docs/source/resources/resources.rst
@@ -3,7 +3,3 @@ Resources
 
 Please check the GitHub for more information:
 `GitHub repository <https://github.com/SylphAI-Inc/LightRAG>`_
-
-
-
-
diff --git a/docs/source/tutorials/base_data_class.rst b/docs/source/tutorials/base_data_class.rst
index da78f58e..578782f2 100644
--- a/docs/source/tutorials/base_data_class.rst
+++ b/docs/source/tutorials/base_data_class.rst
@@ -1,4 +1,18 @@
 .. _core-base_data_class_note:
+
+
+.. raw:: html
+
+   <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
+      <a href="https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_dataclasses.ipynb" target="_blank" style="margin-right: 10px;">
+         <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
+      </a>
+      <a href="https://github.com/SylphAI-Inc/AdalFlow/blob/main/tutorials/adalflow_dataclasses.py" target="_blank" style="display: flex; align-items: center; margin-right: 10px;">
+         <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 20px; width: 20px; margin-right: 5px;">
+         <span style="vertical-align: middle;"> Open Source Code</span>
+      </a>
+   </div>
+
 DataClass
 ============
 
@@ -7,10 +21,10 @@ DataClass
 
 ..    `Li Yin <https://github.com/liyin2015>`_
 
-In `PyTorch`, ``Tensor`` is the data type used in ``Module`` and ``Optimizer`` across the library.
-Tensor wraps a multi-dimensional matrix to better support its operations and computations.
+
 In LLM applications, data constantly needs to interact with LLMs in the form of strings via prompt and be parsed back to structured data from LLMs' text prediction.
 :class:`DataClass<core.base_data_class.DataClass>` is designed to ease this data interaction with LLMs via prompt(input) and to parse the text prediction(output).
+It is even more convenient to use together with :doc:`Parser<output_parsers>` to parse the output from LLMs.
 
 .. figure:: /_static/images/dataclass.png
     :align: center
@@ -61,11 +75,13 @@ Here is how users typically use the ``dataclasses`` module:
 We also made the effort to provide more control:
 
 1. **Keep the ordering of your data fields.** We provided :func:`required_field<core.base_data_class.required_field>` with ``default_factory`` to mark the field as required even if it is after optional fields. We also has to do customization to preserve their ordering while being converted to dictionary, json and yaml string.
-2. **Exclude some fields from the output.**  All serialization methods support `exclude` parameter to exclude some fields even for nested dataclasses.
-3. **Allow nested dataclasses, lists, and dictionaries.** All methods support nested dataclasses, lists, and dictionaries.
+2. **Signal the output/input fields.** We allow you to use ``__output_fields__`` and ``__input_fields__`` to explicitly signal the output and input fields. (1) It can be a subset of the fields in the data class. (2) You can specify the ordering in the `__output_fields__`.
+3. **Exclude some fields from the output.**  All serialization methods support `exclude` parameter to exclude some fields even for nested dataclasses.
+4. **Allow nested dataclasses, lists, and dictionaries.** All methods support nested dataclasses, lists, and dictionaries.
+5. **Easy to use with Output parser.**  It works well with output parsers such as ``JsonOutputParser``, ``YamlOutputParser``, and ``DataClassParser``. You can refer to :doc:`Parser<output_parsers>`for more details.
 
 
-Describing the Data Format
+Describing the Data Format (Data Class)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. list-table::
@@ -74,6 +90,10 @@ Describing the Data Format
 
    * - **Name**
      - **Description**
+   * - ``__input_fields__``
+     - A list of fields that are input fields.
+   * - ``__output_fields__``
+     - Used more often than ``__input_fields__``. A list of fields that are output fields. (1) It can be a subset of the fields in the data class. (2) You can specify the ordering in the `__output_fields__`. (3) Works well and only with :class:`DataClassParser<core.base_data_class.DataClassParser>`.
    * - ``to_schema(cls, exclude) -> Dict``
      - Generate a JSON schema which is more detailed than the signature.
    * - ``to_schema_str(cls, exclude) -> str``
@@ -227,7 +247,7 @@ As you can see, it handles the nested dataclass `Question` and the required fiel
 
 .. note::
 
-    ``Optional`` type hint will not affect the field's required status. You can use this to work with static type checkers such as `mypy` if you want to.
+    ``Optional`` type hint will not affect the field's required status. We recommend you not to use it in the `dataclasses` module especially when you are nesting many levels of dataclasses. It might end up confusing the LLMs.
 
 **Signature**
 
@@ -600,7 +620,10 @@ You can simply do a bit customization to map the dataset's key to the field name
 
     If you are looking for data types we used to support each component or any other class like `Optimizer`, you can check out the :ref:`core.types<core-types>` file.
 
-
+About __output_fields__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Though you can use `exclude` in the :class:`JsonOutputParser<components.output_parsers.outputs.JsonOutputParser>` to exclude some fields from the output, it is less readable and less convenient than
+directly use `__output_fields__` in the data class to signal the output fields and directly work with :class:`DataClassParser<components.output_parsers.dataclass_parser.DataClassParser>`.
 
 .. admonition:: References
    :class: highlight
@@ -616,7 +639,9 @@ You can simply do a bit customization to map the dataset's key to the field name
    - :class:`core.base_data_class.DataClassFormatType`
    - :func:`core.functional.custom_asdict`
    - :ref:`core.base_data_class<core-base_data_class>`
-
+   - :class:`core.base_data_class.required_field`
+   - :class:`components.output_parsers.outputs.JsonOutputParser`
+   - :class:`components.output_parsers.dataclass_parser.DataClassParser`
 
 .. Document
 .. ------------
diff --git a/docs/source/tutorials/component.rst b/docs/source/tutorials/component.rst
index a9e4ae37..649f5310 100644
--- a/docs/source/tutorials/component.rst
+++ b/docs/source/tutorials/component.rst
@@ -1,7 +1,7 @@
 .. raw:: html
 
    <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
-      <a href="https://colab.research.google.com/drive/1aD0C8-iMB8quIn8FKhrtFAGcrboRNg2C?usp=sharing" target="_blank" style="margin-right: 10px;">
+      <a href="https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_component.ipynb" target="_blank" style="margin-right: 10px;">
          <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
       </a>
       <a href="https://github.com/SylphAI-Inc/LightRAG/blob/main/adalflow/adalflow/core/component.py" target="_blank" style="display: flex; align-items: center;">
diff --git a/docs/source/tutorials/lightrag_design_philosophy.rst b/docs/source/tutorials/lightrag_design_philosophy.rst
index 83bb3272..b215b0b1 100644
--- a/docs/source/tutorials/lightrag_design_philosophy.rst
+++ b/docs/source/tutorials/lightrag_design_philosophy.rst
@@ -3,12 +3,12 @@
 Design Philosophy
 ====================================
 
-Right from the begining, `LightRAG` follows three fundamental principles.
+Right from the begining, `AdalFlow` follows three fundamental principles.
 
 
 Principle 1:  Simplicity over Complexity
 -----------------------------------------------------------------------
- We put these three hard rules while designing LightRAG:
+ We put these three hard rules while designing AdalFlow:
 
 - Every layer of abstraction needs to be adjusted and overall we do not allow more than 3 layers of abstraction.
 - We minimize the lines of code instead of maximizing the lines of code.
diff --git a/docs/source/tutorials/model_client.rst b/docs/source/tutorials/model_client.rst
index 943cc574..438d34d3 100644
--- a/docs/source/tutorials/model_client.rst
+++ b/docs/source/tutorials/model_client.rst
@@ -1,3 +1,15 @@
+.. raw:: html
+
+    <div style="display: flex; justify-content: flex-start; align-items: center; gap: 15px; margin-bottom: 20px;">
+    <a target="_blank" href="https://colab.research.google.com/github.com/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_modelclient.ipynb">
+        <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
+    </a>
+    <a href="https://github.com/SylphAI-Inc/AdalFlow/blob/main/tutorials/adalflow_modelclient_sync_and_async.py" target="_blank" style="display: flex; align-items: center;">
+        <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 20px; width: 20px; margin-right: 5px;">
+        <span style="vertical-align: middle;"> Open Source Code [Partial]</span>
+    </a>
+    </div>
+
 .. _tutorials-model_client:
 
 ModelClient
@@ -268,6 +280,1238 @@ The output will be:
 
 .. TODO: add optional package introduction here
 
+OPENAI EMBEDDER - Embedding Processing Example
+-------------------------------------------------
+
+In this example, we are using a collection of embeddings to demonstrate different functionalities such as calculating semantic similarity, finding nearest neighbors, and averaging embeddings. Below is the Python code used to achieve these tasks:
+
+.. code-block:: python
+
+    from typing import List
+    import numpy as np
+    from adalflow.core.types import ModelType, EmbedderOutput
+    from adalflow.components.model_client import OpenAIClient
+    from dataclasses import dataclass
+    from enum import Enum
+    from numpy.linalg import norm
+
+Data Classes
+
+We use two dataclass types to structure the collection and usage data:
+
+EmbeddingCollection: Stores an individual embedding collection and its corresponding index.
+Usage: Keeps track of token usage, such as prompt_tokens and total_tokens.
+
+.. code-block:: python
+
+    @dataclass
+    class EmbeddingCollection:
+        collection: List[float]
+        cindex: int
+
+
+    @dataclass
+    class Usage:
+        prompt_tokens: int
+        total_tokens: int
+
+The following function, `get_openai_embedding`, sends a request to the OpenAI API to retrieve embeddings for a given text. It sets the model type to `EMBEDDER`, prepares the required model-specific parameters, and processes the response:
+
+.. code-block:: python
+
+    openai_client = OpenAIClient()
+
+    def get_openai_embedding(text):
+        # Set model type to EMBEDDER for embedding functionality
+        model_type = ModelType.EMBEDDER
+
+        # Prepare input and model-specific parameters
+        input = text
+        model_kwargs = {
+            "model": "text-embedding-3-small",
+            "dimensions": 8,
+            "encoding_format": "float",
+        }
+
+        # Convert inputs to the required API format
+        api_kwargs = openai_client.convert_inputs_to_api_kwargs(
+            input=input, model_kwargs=model_kwargs, model_type=model_type
+        )
+        print(f"api_kwargs: {api_kwargs}")  # Debug output to verify API arguments
+
+        # Call OpenAI API and parse response for embeddings
+        response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)
+        reponse_embedder_output = openai_client.parse_embedding_response(response)
+        print(
+            f"reponse_embedder_output: {reponse_embedder_output}"
+        )  # Debug output to verify embeddings
+        return reponse_embedder_output
+
+Embedding Processing
+
+The function process_embeddings takes in a collection of embeddings and provides utilities for calculating similarity, averaging embeddings, and finding nearest neighbors:
+
+Similarity: Measures the cosine similarity between two embeddings.
+Average Embedding: Computes the mean embedding across a set of embeddings.
+Nearest Neighbors: Identifies the top-k nearest neighbors based on cosine similarity.
+
+.. code-block:: python
+
+    def process_embeddings(embeddings_collection):
+        # Extract embedding data for each item in the collection
+        embeddingOutput = [emb.collection for emb in embeddings_collection]
+        embeddingDataList = [each_emb_out.data for each_emb_out in embeddingOutput]
+        embeddingList = [
+            each_item.embedding
+            for each_emb_data in embeddingDataList
+            for each_item in each_emb_data
+        ]
+
+        # Convert to numpy array for easier manipulation and calculations
+        embeddings_array = np.array(embeddingList)
+
+        def calculate_similarity(emb1, emb2):
+            # Compute cosine similarity between two embeddings
+            return np.dot(emb1, emb2) / (norm(emb1) * norm(emb2))
+
+        def get_average_embedding(embeddings_list):
+            # Calculate the mean embedding across a list of embeddings
+            return np.mean(embeddings_list, axis=0)
+
+        def find_nearest_neighbors(
+            query_index: int, embedding_list: List[List[float]], k: int = 5
+        ):
+            # Find top-k most similar embeddings to a query embedding, based on cosine similarity
+            query_embedding = embedding_list[query_index]
+            similarities = [
+                (i, calculate_similarity(query_embedding, emb))
+                for i, emb in enumerate(embedding_list)
+                if i != query_index
+            ]
+            return sorted(similarities, key=lambda x: x[1], reverse=True)[:k]
+
+        # Return dictionary of functions and processed data for further use
+        return {
+            "embeddings_array": embeddings_array,
+            "calculate_similarity": calculate_similarity,
+            "average_embedding": get_average_embedding,
+            "find_nearest_neighbors": find_nearest_neighbors,
+        }
+
+The function `demonstrate_embeddings_usage` showcases how to analyze semantic similarities, find nearest neighbors, and calculate average embeddings for sample texts. It selects random texts, compares their similarities, finds nearest neighbors for a specific query, and compares average embeddings for texts containing "Paris".
+
+.. code-block:: python
+
+    # Demonstrate embeddings usage with sample data
+    def demonstrate_embeddings_usage(sample_embeddings, input_text_list):
+          # Initialize processor and retrieve embeddings array
+          processor = process_embeddings(sample_embeddings)
+          embeddings = processor["embeddings_array"]
+
+          print("1. Analyzing Semantic Similarities:")
+          print("-" * 50)
+
+          # Select a few random indices for similarity testing
+          num_indices = 5
+          assert len(input_text_list) == len(embeddings)
+          indices = np.random.choice(len(input_text_list), num_indices, replace=False)
+          selected_text = np.array(input_text_list)[indices]
+          selected_embeddings = np.array(embeddings)[indices]
+
+          # Display selected texts and their embeddings
+          print("Selected indices:", indices)
+          print("Selected elements from array1:", selected_text)
+          print("Selected elements from array2:", selected_embeddings)
+
+          # Calculate similarity between each pair of selected texts
+          for i in range(len(selected_text)):
+              for j in range(i + 1, len(selected_text)):
+                  similarity = processor["calculate_similarity"](
+                      selected_embeddings[i], selected_embeddings[j]
+                  )
+                  print(f"\nComparing:\n'{selected_text[i]}' \nwith:\n'{selected_text[j]}'")
+                  print(f"Similarity score: {similarity:.4f}")
+
+          print("\n2. Finding Nearest Neighbors:")
+          print("-" * 50)
+
+          # Find and display the 3 nearest neighbors for the first text
+          query_idx = 0
+          neighbors = processor["find_nearest_neighbors"](query_idx, embeddings, k=3)
+          print(f"\nQuery text: '{input_text_list[query_idx]}'")
+          print("\nNearest neighbors:")
+
+          for idx, similarity in neighbors:
+              print(f"- '{input_text_list[idx]}' (similarity: {similarity:.4f})")
+
+          print("\n3. Using Average Embeddings:")
+          print("-" * 50)
+
+          # Calculate and compare the average embedding for texts containing "Paris"
+          paris_indices = [i for i, text in enumerate(input_text_list) if "Paris" in text]
+          paris_embeddings = embeddings[paris_indices]
+          avg_paris_embedding = processor["average_embedding"](paris_embeddings)
+
+          print("\nComparing average 'Paris' embedding with all texts:")
+          for i, text in enumerate(input_text_list):
+              similarity = processor["calculate_similarity"](
+                  avg_paris_embedding, embeddings[i]
+              )
+              print(f"- '{text}' (similarity: {similarity:.4f})")
+
+
+Running the Model Client
+
+Finally, we run the model client by initializing a set of sample texts, generating their embeddings, and using the embedding processing functions to analyze similarities and neighbors.
+
+.. code-block:: python
+
+    def run_model_client_embedding_usage():
+        # Define a set of sample texts to test embedding and similarity functionalities
+        sample_texts = [
+            "What is the capital of France?",
+            "Paris is the capital of France.",
+            "What is the population of France?",
+            "How big is Paris?",
+            "What is the weather like in Paris?",
+        ]
+
+        # Duplicate each sample text to form an input list with repeated entries (for embedding testing)
+        input_text_list = [text for text in sample_texts for _ in range(2)]
+
+        # Generate embeddings for each text in the input list, and store them in an EmbeddingCollection
+        embeddings_collection = [
+            EmbeddingCollection(collection=get_openai_embedding(text), cindex=i)
+            for i, text in enumerate(input_text_list)
+        ]
+        print(
+            embeddings_collection
+        )  # Debugging output to verify embeddings collection content
+
+        # Demonstrate the usage of embeddings by analyzing similarities, finding neighbors, etc.
+        demonstrate_embeddings_usage(embeddings_collection, input_text_list)
+
+To execute the complete example, simply call the `run_model_client_embedding_usage()` function:
+
+.. code-block:: python
+
+    run_model_client_embedding_usage()
+
+
+This will trigger the embedding retrieval and processing functions, and you will see the results printed out, demonstrating how embeddings can be used for similarity analysis, neighbor finding, and averaging.
+
+OPENAI LLM Chat - Multichat Usage
+-------------------------------------------------
+This example demonstrates how to create a multichat system using OpenAI's LLM with adalflow, where the assistant's responses depend on the entire conversation history. This allows for a more dynamic and context-aware conversation flow.
+
+.. code-block:: python
+
+    from adalflow.components.model_client import OpenAIClient
+    from adalflow.core.types import ModelType
+    from adalflow.utils import setup_env
+    from typing import List, Dict
+
+ChatConversation Class
+
+Here, we define a ``ChatConversation`` class to manage the conversation history and make API calls to the OpenAI model. The assistant's responses are generated based on the entire conversation history.
+
+.. code-block:: python
+
+    class ChatConversation:
+        def __init__(self):
+            # Initialize the OpenAI client for managing API calls
+            self.openai_client = OpenAIClient()
+            # Initialize an empty conversation history to store chat messages
+            self.conversation_history: str = ""
+            # Model parameters to customize the API call
+            self.model_kwargs = {
+                "model": "gpt-3.5-turbo",
+                "temperature": 0.5,  # Controls randomness; 0.5 for balanced responses
+                "max_tokens": 100,  # Limits the response length
+            }
+
+        def add_user_message(self, message: str):
+            """Add a user message to the conversation history"""
+            self.conversation_history += (
+                f"<USER> {message} </USER>"  # Format for user message
+            )
+
+        def add_assistant_message(self, message: str):
+            """Add an assistant message to the conversation history"""
+            self.conversation_history += (
+                f"<ASSISTANT> {message} </ASSISTANT>"  # Format for assistant message
+            )
+
+        def get_response(self) -> str:
+            """Get response from the model based on conversation history"""
+            # Convert the conversation history and model parameters into API arguments
+            api_kwargs = self.openai_client.convert_inputs_to_api_kwargs(
+                input=self.conversation_history,
+                model_kwargs=self.model_kwargs,
+                model_type=ModelType.LLM,
+            )
+            print(f"api_kwargs: {api_kwargs}")  # Debugging output to verify API parameters
+
+            # Call the API with the generated arguments to get a response
+            response = self.openai_client.call(
+                api_kwargs=api_kwargs, model_type=ModelType.LLM
+            )
+            print("response: ", response)  # Debugging output for raw API response
+
+            # Extract and parse the text response from the API output
+            response_text = self.openai_client.parse_chat_completion(response)
+            # Update conversation history with the assistant's response
+            self.add_assistant_message(response_text)
+            return response_text  # Return the assistant's response to the caller
+
+Simulating a Multi-turn Conversation
+
+In the ``check_chat_conversation()`` function, we simulate a multi-turn conversation by iterating over a list of user questions. Each question is added to the conversation history, and the assistant responds based on the accumulated conversation context.
+
+.. code-block:: python
+
+    def check_chat_conversation():
+        # Initialize a new chat conversation
+        chat = ChatConversation()
+
+        # Example list of user questions to simulate a multi-turn conversation
+        questions = [
+            "What is the capital of France?",
+            "What is its population?",
+            "Tell me about its famous landmarks",
+        ]
+
+        # Iterate through each question in the list
+        for question in questions:
+            print(f"\nUser: {question}")  # Display the user's question
+            chat.add_user_message(
+                question
+            )  # Add the user question to the conversation history
+
+            response = (
+                chat.get_response()
+            )  # Get assistant's response based on conversation history
+            print(f"Assistant: {response}")  # Display the assistant's response
+
+        # Display the full conversation history after all exchanges
+        print("\nFull Conversation History:")
+        print(chat.conversation_history)  # Print the accumulated conversation history
+
+Key Points
+You can observe that each question is depended on previous question and the chat responds in apt manner
+check_chat_conversation()
+
+OPENAI LLM Chat - Multichat Usage - Asynchronous
+-------------------------------------------------
+
+This example demonstrates how to create an asynchronous multichat system using OpenAI's LLM with adalflow. The asynchronous approach allows handling multiple questions in parallel, making the interaction more efficient when dealing with unrelated queries.
+
+.. code-block:: python
+
+    import asyncio
+    from adalflow.components.model_client import OpenAIClient
+    from adalflow.core.types import ModelType
+    from typing import List
+
+ChatConversationAsync Class
+
+The ``ChatConversationAsync`` class is designed to handle asynchronous API calls to the OpenAI model. It supports concurrent requests, which improves performance when interacting with multiple questions simultaneously.
+
+.. code-block:: python
+
+    class ChatConversationAsync:
+        def __init__(self):
+            # Initialize with an asynchronous OpenAI client
+            self.openai_client = OpenAIClient()
+
+            # Default model parameters for the chat
+            self.model_kwargs = {
+                "model": "gpt-3.5-turbo",  # Model used for chat
+                "temperature": 0.5,  # Controls randomness in response
+                "max_tokens": 100,  # Maximum tokens in the generated response
+            }
+
+        async def get_response(self, message: str) -> str:
+            """Asynchronously get a response from the model for a given user message"""
+
+            # Convert input message and model parameters into the format expected by the API
+            api_kwargs = self.openai_client.convert_inputs_to_api_kwargs(
+                input=message,  # User's message input
+                model_kwargs=self.model_kwargs,  # Model-specific settings
+                model_type=ModelType.LLM,  # Specify the model type as a language model (LLM)
+            )
+            print(f"api_kwargs: {api_kwargs}")  # Log the API arguments for debugging
+
+            # Make an asynchronous API call to OpenAI's model
+            response = await self.openai_client.acall(
+                api_kwargs=api_kwargs,  # Pass the prepared arguments
+                model_type=ModelType.LLM,  # Specify the model type again
+            )
+            print("response: ", response)  # Print the raw response from the API
+
+            # Parse the API response to extract the assistant's reply (chat completion)
+            response_text = self.openai_client.parse_chat_completion(response)
+            return response_text  # Return the parsed response text
+
+Running Multiple Asynchronous Chat Sessions
+
+In the ``check_chat_conversations_async()`` function, we handle a list of unrelated user questions concurrently. This is done by creating a list of asynchronous tasks and gathering their responses.
+
+.. code-block:: python
+
+    async def check_chat_conversations_async():
+        # Create an instance of ChatConversationAsync to handle asynchronous operations
+        chat = ChatConversationAsync()
+
+        # List of unrelated questions that will be handled in parallel
+        questions = [
+            "What is the capital of France?",  # Question 1
+            "Is dog a wild animal?",  # Question 2
+            "Tell me about amazon forest",  # Question 3
+        ]
+
+        # Create a list of asynchronous tasks, one for each question
+        # Each task calls the get_response method asynchronously for a question
+        tasks = [chat.get_response(question) for question in questions]
+
+        # Gather the results of all asynchronous tasks concurrently
+        responses = await asyncio.gather(*tasks)
+
+        # Print the responses from the assistant along with the respective user questions
+        for question, response in zip(questions, responses):
+            print(f"\nUser: {question}")
+            print(f"Assistant: {response}")
+
+Running the Asynchronous Function
+
+To execute the asynchronous function, you can use the following methods based on your environment:
+
+.. code-block:: python
+
+    # Run the asynchronous function if in a file
+    # asyncio.run(check_chat_conversations_async())
+
+    # in jupyter notebook
+    await check_chat_conversations_async()
+
+This approach allows you to handle multiple independent conversations concurrently, improving the system's performance and responsiveness.
+
+OPENAI LLM Chat - Multichat Usage - Benchmark sync() vs async()
+---------------------------------------------------------------------
+
+This section compares the performance of synchronous (``call()``) vs. asynchronous (``acall()``) API calls to OpenAI's language model, benchmarking them using a sample prompt to determine which approach is more efficient for handling multiple API requests.
+
+.. code-block:: python
+
+    import asyncio
+    import time
+    from adalflow.components.model_client import (
+        OpenAIClient,
+    )  # Assuming OpenAIClient with .call() and .acall() is available
+    from adalflow.core.types import ModelType
+
+Setup for Benchmarking
+
+We initialize the OpenAI client and set up a sample prompt to test both synchronous and asynchronous API calls.
+
+.. code-block:: python
+
+    # Initialize the OpenAI client
+    openai_client = OpenAIClient()
+
+    # Sample prompt for testing
+    prompt = "Tell me a joke."
+
+    model_kwargs = {"model": "gpt-3.5-turbo", "temperature": 0.5, "max_tokens": 100}
+
+Synchronous Benchmarking
+
+The ``benchmark_sync_call`` function runs the synchronous ``.call()`` method multiple times and measures the total time taken for all requests.
+
+.. code-block:: python
+
+    # Synchronous function for benchmarking .call()
+    def benchmark_sync_call(api_kwargs, runs=10):
+        """
+        Benchmark the synchronous .call() method by running it multiple times.
+
+        Parameters:
+        - api_kwargs: The arguments to be passed to the API call
+        - runs: The number of times to run the call (default is 10)
+        """
+        # List to store responses
+        responses = []
+
+        # Record the start time of the benchmark
+        start_time = time.time()
+
+        # Perform synchronous API calls for the specified number of runs
+        responses = [
+            openai_client.call(
+                api_kwargs=api_kwargs,  # API arguments
+                model_type=ModelType.LLM,  # Model type (e.g., LLM for language models)
+            )
+            for _ in range(runs)  # Repeat 'runs' times
+        ]
+
+        # Record the end time after all calls are completed
+        end_time = time.time()
+
+        # Output the results of each synchronous call
+        for i, response in enumerate(responses):
+            print(f"sync call {i + 1} completed: {response}")
+
+        # Print the total time taken for all synchronous calls
+        print(f"\nSynchronous benchmark completed in {end_time - start_time:.2f} seconds")
+
+
+    # Asynchronous function for benchmarking .acall()
+    async def benchmark_async_acall(api_kwargs, runs=10):
+        """
+        Benchmark the asynchronous .acall() method by running it multiple times concurrently.
+
+        Parameters:
+        - api_kwargs: The arguments to be passed to the API call
+        - runs: The number of times to run the asynchronous call (default is 10)
+        """
+        # Record the start time of the benchmark
+        start_time = time.time()
+
+        # Create a list of asynchronous tasks for the specified number of runs
+        tasks = [
+            openai_client.acall(
+                api_kwargs=api_kwargs,  # API arguments
+                model_type=ModelType.LLM,  # Model type (e.g., LLM for language models)
+            )
+            for _ in range(runs)  # Repeat 'runs' times
+        ]
+
+        # Execute all tasks concurrently and wait for them to finish
+        responses = await asyncio.gather(*tasks)
+
+        # Record the end time after all tasks are completed
+        end_time = time.time()
+
+        # Output the results of each asynchronous call
+        for i, response in enumerate(responses):
+            print(f"Async call {i + 1} completed: {response}")
+
+        # Print the total time taken for all asynchronous calls
+        print(f"\nAsynchronous benchmark completed in {end_time - start_time:.2f} seconds")
+
+.. code-block:: python
+
+    api_kwargs = openai_client.convert_inputs_to_api_kwargs(
+        input=prompt, model_kwargs=model_kwargs, model_type=ModelType.LLM
+    )
+
+    # Run both benchmarks
+    print("Starting synchronous benchmark...\n")
+    benchmark_sync_call(api_kwargs)
+
+    # Run the asynchronous function if in a file
+    # asyncio.run(benchmark_async_acall(api_kwargs))
+
+    print("\nStarting asynchronous benchmark...\n")
+    await benchmark_async_acall(api_kwargs)
+
+OPENAI LLM Chat - Additional Utils
+-------------------------------------------------
+
+This section demonstrates the use of additional utility functions for OpenAI's language model client. The following utility functions are included:
+
+-  ``get_first_message_content()``
+-  ``get_all_messages_content()``
+-  ``get_probabilities()``
+
+These utilities can be used to interact with the OpenAI model in various ways, such as extracting the first message content, retrieving all message content from a multi-chat scenario, and calculating the probabilities of tokens.
+
+Code Setup
+
+First, we import necessary components for utilizing the OpenAI client and the utilities from the ``adalflow`` library.
+
+.. code-block:: python
+
+    from adalflow.components.model_client import OpenAIClient
+    from adalflow.core.types import ModelType
+    from adalflow.utils import setup_env
+    from adalflow.components.model_client.openai_client import (
+        get_first_message_content,
+        get_all_messages_content,
+        get_probabilities,
+    )
+    from adalflow.core import Generator
+
+Function: ``check_openai_additional_utils``
+
+This function demonstrates how to use the OpenAI client along with a custom utility function for generating responses from the model, based on the given query and utility function.
+
+.. code-block:: python
+
+    def check_openai_additional_utils(func, model_kwargs):
+        """
+        This function demonstrates the usage of the OpenAI client and a custom utility function
+        for generating responses from the LLM model, based on the given query in openai client.
+
+        Parameters:
+        - func: A function that will be used to parse the chat completion (for custom parsing).
+        - model_kwargs: The additional model parameters (e.g., temperature, max_tokens) to be used in the model.
+
+        Returns:
+        - output: The generated response from the model based on the query.
+        """
+
+        # Initialize the OpenAI client with a custom chat completion parser
+        openai_client = OpenAIClient(chat_completion_parser=func)
+
+        # Define a sample query (user question)
+        query = "What is the capital of France?"
+
+        # Set the model type to LLM (Large Language Model)
+        model_type = ModelType.LLM
+
+        # Create the prompt by formatting the user query as a conversation
+        prompt = f"User: {query}\n"
+
+        # Define any additional parameters needed for the model (e.g., the input string)
+        prompt_kwargs = {
+            "input_str": "What is the capital of France?",
+        }
+
+        # Initialize the Generator with the OpenAI client and model parameters
+        generator = Generator(model_client=openai_client, model_kwargs=model_kwargs)
+
+        # Execute the generator to get a response for the prompt (using the defined prompt_kwargs)
+        output = generator(prompt_kwargs=prompt_kwargs)
+
+        # Return the generated output (response from the LLM)
+        return output
+
+Function: ``run_utils_functions``
+
+This function runs a series of utility functions using different model configurations for generating responses. It demonstrates how to check OpenAI model outputs using various utility functions.
+
+.. code-block:: python
+
+    def run_utils_functions():
+        """
+        This function runs a series of utility functions using different model
+        configurations for generating responses. It demonstrates how to check
+        OpenAI model outputs using various utility functions.
+        """
+
+        # Define the model arguments for the probability-based function (with logprobs)
+        probability_model_kwargs = {
+            "model": "gpt-3.5-turbo",  # Specify the model version
+            "logprobs": True,  # Enable logprobs to get probability distributions for tokens
+            "n": 2,  # Request 2 different completions for each query
+        }
+
+        # Define general model arguments for most other functions
+        model_kwargs = {
+            "model": "gpt-3.5-turbo",  # Specify the model version
+            "temperature": 0.5,  # Control the randomness of responses (0 is deterministic)
+            "max_tokens": 100,  # Set the maximum number of tokens (words) in the response
+        }
+
+        # List of functions to run with corresponding model arguments
+        func_list = [
+            [
+                get_probabilities,
+                probability_model_kwargs,
+            ],  # Function to get probabilities with specific kwargs
+            [
+                get_first_message_content,
+                model_kwargs,
+            ],  # Function to get first message content
+            [
+                get_all_messages_content,
+                model_kwargs,
+            ],  # Function to get all messages content in multi-chat scenarios
+        ]
+
+        # Loop through each function and its corresponding arguments
+        for each_func in func_list:
+            # Check the function output using the specified arguments
+            result = check_openai_additional_utils(each_func[0], each_func[1])
+
+            # Print the function and result for debugging purposes
+            print(f"Function: {each_func[0].__name__}, Model Args: {each_func[1]}")
+            print(f"Result: {result}")
+
+Running the Utility Functions
+
+To execute the utility functions, we call the ``run_utils_functions()`` method, which runs the defined functions and prints their results.
+
+.. code-block:: python
+
+    run_utils_functions()
+
+Purpose and Usage
+These utilities (``get_first_message_content``, ``get_all_messages_content``, and ``get_probabilities``) allow users to extract specific information from the OpenAI LLM responses, such as individual message contents in a chat or the probability distribution over tokens.
+
+
+Groq LLM Chat - Multichat Usage
+-------------------------------------------------
+
+Note: Groq doesnt have embedder method to get embeddings like openai
+
+The following example demonstrates how to set up a multi-turn conversation with the Groq LLM using the ``GroqAPIClient``.
+
+.. code-block:: python
+
+    from adalflow.components.model_client import GroqAPIClient
+    from adalflow.core.types import ModelType
+    from adalflow.utils import setup_env
+    from typing import List, Dict
+
+ChatConversation Class
+
+This class handles the conversation flow by interacting with the Groq model, keeping track of the conversation history, and generating responses.
+
+.. code-block:: python
+
+    class ChatConversation:
+        def __init__(self):
+            """
+            Initialize a new ChatConversation object.
+            - GroqAPIClient is used to interact with the Groq model.
+            - conversation_history keeps track of the conversation between the user and assistant.
+            - model_kwargs contains the model parameters like temperature and max tokens.
+            """
+            self.groq_client = (
+                GroqAPIClient()
+            )  # Initialize GroqAPIClient for model interaction
+            self.conversation_history: str = (
+                ""  # Initialize conversation history as an empty string
+            )
+            self.model_kwargs = {
+                "model": "llama3-8b-8192",  # Specify the model to use
+                "temperature": 0.5,  # Set the temperature for response variability
+                "max_tokens": 100,  # Limit the number of tokens in the response
+            }
+
+        def add_user_message(self, message: str):
+            """
+            Add a user message to the conversation history in the required format.
+            The message is wrapped with <USER> tags for better processing by the assistant.
+            """
+            self.conversation_history += (
+                f"<USER> {message} </USER>"  # Append user message to history
+            )
+
+        def add_assistant_message(self, message: str):
+            """
+            Add an assistant message to the conversation history in the required format.
+            The message is wrapped with <ASSISTANT> tags for better processing.
+            """
+            self.conversation_history += (
+                f"<ASSISTANT> {message} </ASSISTANT>"  # Append assistant message to history
+            )
+
+        def get_response(self) -> str:
+            """
+            Generate a response from the assistant based on the conversation history.
+            - Converts the conversation history and model kwargs into the format required by the Groq API.
+            - Calls the API to get the response.
+            - Parses and adds the assistant's reply to the conversation history.
+            """
+            # Prepare the request for the Groq API, converting the inputs into the correct format
+            api_kwargs = self.groq_client.convert_inputs_to_api_kwargs(
+                input=self.conversation_history,  # Use the conversation history as input
+                model_kwargs=self.model_kwargs,  # Include model-specific parameters
+                model_type=ModelType.LLM,  # Specify the model type (Large Language Model)
+            )
+            print(f"api_kwargs: {api_kwargs}")  # Log the API request parameters
+
+            # Call the Groq model API to get the response
+            response = self.groq_client.call(
+                api_kwargs=api_kwargs,
+                model_type=ModelType.LLM,  # Specify the model type again for clarity
+            )
+            print("response: ", response)  # Log the API response
+
+            # Parse the response to extract the assistant's reply
+            response_text = self.groq_client.parse_chat_completion(response)
+
+            # Add the assistant's message to the conversation history
+            self.add_assistant_message(response_text)
+
+            # Return the assistant's response text
+            return response_text
+
+Example Multi-Turn Conversation
+
+The following function simulates a multi-turn conversation, where the user asks a series of questions and the assistant responds. It demonstrates how user inputs are processed, and responses are generated while maintaining the conversation history.
+
+.. code-block:: python
+
+    def check_chat_conversation():
+        """
+        This function simulates a multi-turn conversation between a user and an assistant.
+        It demonstrates how user inputs are processed, and the assistant generates responses,
+        while maintaining the conversation history for each query.
+        """
+        # Initialize the ChatConversation object
+        chat = ChatConversation()  # This creates an instance of the ChatConversation class
+
+        # Define a list of user questions for a multi-turn conversation
+        questions = [
+            "What is the capital of France?",  # First user question
+            "What is its population?",  # Second user question
+            "Tell me about its famous landmarks",  # Third user question
+        ]
+
+        # Loop through each question and get the assistant's response
+        for question in questions:
+            # Print the current question from the user
+            print(f"\nUser: {question}")
+
+            # Add the user's message to the conversation history
+            chat.add_user_message(question)
+
+            # Get the assistant's response based on the conversation history
+            response = chat.get_response()
+
+            # Print the assistant's response
+            print(f"Assistant: {response}")
+
+        # After the conversation, print the full conversation history
+        print("\nFull Conversation History:")
+        print(
+            chat.conversation_history
+        )  # This will print all messages (user and assistant) in the conversation history
+
+Run the following to use groq_client multichat ability
+
+.. code-block:: python
+
+    check_chat_conversation()
+
+Groq LLM Chat - Multichat Usage - Asynchronous
+-------------------------------------------------
+
+This example demonstrates how to perform multi-turn conversations with the Groq LLM using asynchronous calls for each query. It uses Python's `asyncio` to handle multiple independent requests concurrently.
+
+.. code-block:: python
+
+    import asyncio
+    from adalflow.components.model_client import GroqAPIClient
+    from adalflow.core.types import ModelType
+    from typing import List
+
+ChatConversation Class
+
+This class allows you to interact asynchronously with the Groq model. The get_response method fetches responses from the model for a single user input asynchronously.
+
+.. code-block:: python
+
+    class ChatConversation:
+        def __init__(self):
+            # Using an asynchronous client for communication with GroqAPI
+            self.groq_client = GroqAPIClient()  # Create an instance of GroqAPIClient
+            # Model configuration parameters (e.g., Llama model with 8b parameters and 8192 context length)
+            self.model_kwargs = {
+                "model": "llama3-8b-8192",  # Llama model with specific size
+                "temperature": 0.5,  # Degree of randomness in the model's responses
+                "max_tokens": 100,  # Maximum number of tokens in the response
+            }
+
+        async def get_response(self, message: str) -> str:
+            """Get response from the model for a single message asynchronously"""
+
+            # Convert the user input message to the appropriate format for the Groq API
+            api_kwargs = self.groq_client.convert_inputs_to_api_kwargs(
+                input=message,  # User's input message
+                model_kwargs=self.model_kwargs,  # Model parameters
+                model_type=ModelType.LLM,  # Model type for large language models (LLM)
+            )
+            print(f"api_kwargs: {api_kwargs}")  # Print the API arguments for debugging
+
+            # Asynchronously call the Groq API with the provided API arguments
+            response = await self.groq_client.acall(
+                api_kwargs=api_kwargs,  # Pass the API arguments
+                model_type=ModelType.LLM,  # Specify the model type
+            )
+            print("response: ", response)  # Print the API response for debugging
+
+            # Parse the response to extract the assistant's reply from the API response
+            response_text = self.groq_client.parse_chat_completion(response)
+            return response_text  # Return the assistant's response text
+
+Example Asynchronous Multi-Turn Conversation
+
+The following function demonstrates how multiple independent questions are handled asynchronously. Each question is processed concurrently, and their responses are gathered using asyncio.gather.
+
+.. code-block:: python
+
+    async def check_chat_conversations():
+        # Create an instance of ChatConversation
+        chat = ChatConversation()
+
+        # List of unrelated questions for independent async calls
+        questions = [
+            "What is the capital of France?",
+            "Is dog a wild animal ?",
+            "Tell me about amazon forest",
+        ]
+
+        # Run each question as an independent asynchronous task
+        tasks = [chat.get_response(question) for question in questions]
+        # Gather all the responses concurrently
+        responses = await asyncio.gather(*tasks)
+
+        # Display each response alongside the question
+        for question, response in zip(questions, responses):
+            print(f"\nUser: {question}")
+            print(f"Assistant: {response}")
+
+To execute the function, run the following:
+
+.. code-block:: python
+
+    # Run the asynchronous function if in a file
+    # asyncio.run(check_chat_conversations())
+
+    await check_chat_conversations()
+
+Groq LLM Chat - Multichat Usage - Benchmark sync() vs async()
+-----------------------------------------------------------------
+
+This example demonstrates how to benchmark the synchronous ``.call()`` method versus the asynchronous ``.acall()`` method for making API calls using Groq. The benchmark compares the time taken to execute multiple API requests synchronously and asynchronously.
+
+
+.. code-block:: python
+
+    import asyncio
+    import time
+    from adalflow.components.model_client import (
+        GroqAPIClient,
+    )  # Assuming GroqAPI with .call() and .acall() is available
+    from adalflow.core.types import ModelType
+
+Initialization
+
+The following code initializes the Groq client and sets up the sample prompt and model parameters for testing.
+
+.. code-block:: python
+
+    # Initialize the Groq client
+    groq_client = GroqAPIClient()
+
+    # Sample prompt for testing
+    prompt = "Tell me a joke."
+
+    model_kwargs = {"model": "llama3-8b-8192", "temperature": 0.5, "max_tokens": 100}
+
+Benchmarking Synchronous `.call()` Method
+
+This function benchmarks the synchronous `.call()` method by calling the Groq API synchronously multiple times.
+
+.. code-block:: python
+
+    # Synchronous function for benchmarking .call()
+    def benchmark_sync_call(api_kwargs, runs=10):
+        # List to store responses from each synchronous call
+        responses = []
+
+        # Record the start time for benchmarking
+        start_time = time.time()
+
+        # Perform synchronous API calls in a loop
+        responses = [
+            groq_client.call(  # Calling the API synchronously
+                api_kwargs=api_kwargs,  # Passing the API arguments
+                model_type=ModelType.LLM,  # Defining the model type
+            )
+            for _ in range(runs)  # Repeat the call 'runs' times
+        ]
+
+        # Record the end time after all calls are completed
+        end_time = time.time()
+
+        # Print out the response from each synchronous call
+        for i, response in enumerate(responses):
+            print(f"sync call {i + 1} completed: {response}")
+
+        # Print the total time taken for the synchronous benchmark
+        print(f"\nSynchronous benchmark completed in {end_time - start_time:.2f} seconds")
+
+Benchmarking Asynchronous ``.acall()`` Method
+
+This asynchronous function benchmarks the ``.acall()`` method by calling the Groq API asynchronously multiple times using asyncio.gather() to execute tasks concurrently.
+
+.. code-block:: python
+
+    # Asynchronous function for benchmarking .acall()
+    async def benchmark_async_acall(api_kwargs, runs=10):
+        # Record the start time for benchmarking
+        start_time = time.time()
+
+        # Create a list of tasks for asynchronous API calls
+        tasks = [
+            groq_client.acall(  # Calling the API asynchronously
+                api_kwargs=api_kwargs,  # Passing the API arguments
+                model_type=ModelType.LLM,  # Defining the model type
+            )
+            for _ in range(runs)  # Repeat the call 'runs' times
+        ]
+
+        # Await the completion of all tasks concurrently
+        responses = await asyncio.gather(
+            *tasks
+        )  # Gather all the responses from asynchronous calls
+
+        # Record the end time after all asynchronous calls are completed
+        end_time = time.time()
+
+        # Print out the response from each asynchronous call
+        for i, response in enumerate(responses):
+            print(f"Async call {i + 1} completed: {response}")
+
+        # Print the total time taken for the asynchronous benchmark
+        print(f"\nAsynchronous benchmark completed in {end_time - start_time:.2f} seconds")
+
+Running the Benchmarks
+
+The following code sets up the API arguments and runs both the synchronous and asynchronous benchmarks.
+
+.. code-block:: python
+
+    api_kwargs = groq_client.convert_inputs_to_api_kwargs(
+        input=prompt, model_kwargs=model_kwargs, model_type=ModelType.LLM
+    )
+
+    # Run both benchmarks
+    print("Starting synchronous benchmark...\n")
+    benchmark_sync_call(api_kwargs)
+
+    print("\nStarting asynchronous benchmark...\n")
+    await benchmark_async_acall(api_kwargs)
+
+Building Custom Model client
+-------------------------------------------------
+
+Building a Synchronous api call
+
+Note: I am using openai api as a example to build custom model client
+in adalflow. Even though its already there in adalflow repo below
+code will definitly be a starter code whom ever wants to build a
+custom model client
+
+.. code-block:: python
+
+    # Building simple custom third party model client and using it
+    # I have modified convert_inputs_to_api_kwargs() to make sure it follows the prompt of openai and i have used appropiate
+    # openai api call in __call__()
+
+    import openai
+    from adalflow.core.model_client import ModelClient
+    from adalflow.core.types import ModelType, GeneratorOutput, EmbedderOutput
+    from openai.types import (
+        CreateEmbeddingResponse,
+    )
+    from adalflow.components.model_client.utils import parse_embedding_response
+
+This class defines the custom model client. The constructor initializes the client by calling the parent class’s initializer (ModelClient), which is essential for the setup of the Adalflow framework.
+
+.. code-block:: python
+
+    class SimpleCustomModelClient(ModelClient):
+        # Initialize the custom model client
+        def __init__(self):
+            # Call the parent class's initializer
+            super().__init__()
+            pass  # Placeholder for any initialization logic if needed in the future
+
+        # Method to convert input into API parameters for different model types (LLM or Embedder)
+        def convert_inputs_to_api_kwargs(
+            self, input=None, model_kwargs={}, model_type=ModelType.UNDEFINED
+        ):
+            """
+            Convert the inputs into API arguments based on the model type.
+
+            Args:
+                input (str): The input text to be processed.
+                model_kwargs (dict): Additional model parameters like temperature, max_tokens, etc.
+                model_type (ModelType): The type of model to use (LLM or Embedder).
+
+            Returns:
+                dict: API arguments formatted for the specified model type.
+            """
+            if (
+                model_type == ModelType.LLM
+            ):  # If the model type is a large language model (LLM)
+                return {
+                    "model": model_kwargs[
+                        "model"
+                    ],  # Set the model to use (e.g., GPT-3, GPT-4)
+                    "messages": input,  # Provide the input as the message
+                    "temperature": model_kwargs[
+                        "temperature"
+                    ],  # Set the temperature (creativity of the response)
+                    "max_tokens": model_kwargs[
+                        "max_tokens"
+                    ],  # Max tokens to generate in the response
+                }
+            elif model_type == ModelType.EMBEDDER:  # If the model type is an embedder
+                return {
+                    "model": model_kwargs["model"],  # Model name for embedding
+                    "input": [input],  # Provide the input in a list format for embedding
+                }
+            else:
+                # Raise an error if the model type is unsupported
+                raise ValueError(f"model_type {model_type} is not supported")
+
+        # Method to make the actual API call to OpenAI for either completions (LLM) or embeddings
+        def call(self, api_kwargs={}, model_type=ModelType.UNDEFINED):
+            """
+            Call the appropriate OpenAI API method based on the model type (LLM or Embedder).
+
+            Args:
+                api_kwargs (dict): Arguments to be passed to the API call.
+                model_type (ModelType): The type of model (LLM or Embedder).
+
+            Returns:
+                Response: The API response from OpenAI.
+            """
+            if model_type == ModelType.LLM:  # If the model type is LLM (e.g., GPT-3, GPT-4)
+                return openai.chat.completions.create(
+                    **api_kwargs
+                )  # Call the chat API for completion
+            elif model_type == ModelType.EMBEDDER:  # If the model type is Embedder
+                return openai.embeddings.create(**api_kwargs)  # Call the embedding API
+            else:
+                # Raise an error if an invalid model type is passed
+                raise ValueError(f"Unsupported model type: {model_type}")
+
+        # Method to parse the response from a chat completion API call
+        def parse_chat_completion(self, completion):
+            """
+            Parse the response from a chat completion API call into a custom output format.
+
+            Args:
+                completion: The completion response from the OpenAI API.
+
+            Returns:
+                GeneratorOutput: A custom data structure containing the parsed response.
+            """
+            # Note: GeneratorOutput is a adalflow dataclass that contains the parsed completion data
+            return GeneratorOutput(
+                data=completion,  # Store the raw completion data
+                error=None,  # No error in this case
+                raw_response=str(completion),  # Store the raw response as a string
+            )
+
+        # Method to parse the response from an embedding API call
+        def parse_embedding_response(
+            self, response: CreateEmbeddingResponse
+        ) -> EmbedderOutput:
+            """
+            Parse the response from an embedding API call into a custom output format.
+
+            Args:
+                response (CreateEmbeddingResponse): The response from the embedding API.
+
+            Returns:
+                EmbedderOutput: A custom data structure containing the parsed embedding response.
+            """
+            try:
+                # Attempt to parse the embedding response using a helper function
+                return parse_embedding_response(response)
+            except Exception as e:
+                # If parsing fails, return an error message with the raw response
+                return EmbedderOutput(data=[], error=str(e), raw_response=response)
+
+In below block, the custom model client is instantiated, and a query is defined for processing by both an LLM (like GPT-3.5) and an Embedder model. The API arguments are converted, and the call() method is used to fetch responses. Finally, both types of responses (LLM and Embedder) are parsed and printed.
+
+.. code-block:: python
+
+    def build_custom_model_client():
+        # Instantiate the custom model client (SimpleCustomModelClient)
+        custom_client = SimpleCustomModelClient()
+
+        # Define the query for the model to process
+        query = "What is the capital of France?"
+
+        # Set the model type for a Large Language Model (LLM)
+        model_type = ModelType.LLM
+
+        # Prepare the message prompt as expected by the OpenAI chat API.
+        # This format is suitable for GPT-like models (e.g., gpt-3.5-turbo).
+        message_prompt = [
+            {
+                "role": "user",  # Define the user role in the conversation
+                "content": [
+                    {
+                        "type": "text",  # Specify that the input is a text type
+                        "text": query,  # The actual query to be processed by the model
+                    }
+                ],
+            }
+        ]
+
+        # Print message indicating the usage of the LLM model type
+        print("ModelType LLM")
+
+        # Define additional model parameters like model name, temperature, and max tokens for LLM
+        model_kwargs = {"model": "gpt-3.5-turbo", "temperature": 0.5, "max_tokens": 100}
+
+        # Convert the input message and model kwargs into the required API parameters
+        api_kwargs = custom_client.convert_inputs_to_api_kwargs(
+            input=message_prompt, model_kwargs=model_kwargs, model_type=model_type
+        )
+
+        # Print the API arguments that will be passed to the call method
+        print(f"api_kwargs: {api_kwargs}")
+
+        # Call the LLM model using the prepared API arguments
+        result = custom_client.call(api_kwargs, ModelType.LLM)
+
+        # Print the result of the LLM model call (response from OpenAI)
+        print(result)
+
+        # Parse the chat completion response and output a more structured result
+        response_text = custom_client.parse_chat_completion(result)
+
+        # Print the structured response from the chat completion
+        print(f"response_text: {response_text}")
+
+        # Switch to using the Embedder model type
+        print("ModelType EMBEDDER")
+
+        # Define model-specific parameters for the embedding model
+        model_kwargs = {
+            "model": "text-embedding-3-small",
+            "dimensions": 8,
+            "encoding_format": "float",
+        }
+
+        # Convert the input query for the embedder model
+        api_kwargs = custom_client.convert_inputs_to_api_kwargs(
+            input=query, model_kwargs=model_kwargs, model_type=ModelType.EMBEDDER
+        )
+
+        # Print the API arguments that will be passed to the embedder model
+        print(f"embedder api_kwargs: {api_kwargs}")
+
+        # Call the Embedder model using the prepared API arguments
+        result = custom_client.call(api_kwargs, ModelType.EMBEDDER)
+
+        # Print the result of the Embedder model call (embedding response)
+        print(result)
+
+        # Parse the embedding response and output a more structured result
+        response_text = custom_client.parse_embedding_response(result)
+
+        # Print the structured response from the embedding model
+        print(f"response_text: {response_text}")
+
+This is the function call that triggers the execution of the custom model client, processing the defined query and displaying results for both LLM and Embedder.
+
+.. code-block:: python
+
+    build_custom_model_client()
 
 .. admonition:: API reference
    :class: highlight
diff --git a/docs/source/tutorials/output_parsers.rst b/docs/source/tutorials/output_parsers.rst
index c619998b..0df19e09 100644
--- a/docs/source/tutorials/output_parsers.rst
+++ b/docs/source/tutorials/output_parsers.rst
@@ -1,7 +1,26 @@
+.. _components-output_parser_note:
+
+.. raw:: html
+
+   <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
+      <a href="https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_dataclasses.ipynb" target="_blank" style="margin-right: 10px;">
+         <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
+      </a>
+      <a href="https://github.com/SylphAI-Inc/AdalFlow/blob/main/tutorials/parser_note.py" target="_blank" style="display: flex; align-items: center;">
+         <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 20px; width: 20px; margin-right: 5px;">
+         <span style="vertical-align: middle;"> Open Source Code</span>
+      </a>
+
+   </div>
+
 Parser
 =============
 
-Parser is the `interpreter` of the LLM output.
+Parser is the `interpreter` of the LLM output. We have three types of parsers:
+
+- **String Parsers**: it simply converts the string to the desired data type. They are located at :ref:`core.string_parser<core-string_parser>`.
+- **Output Parsers**: it orchestrates the parsing and output formatting(in yaml, json and more) process. They are located at :ref:`components.output_parsers.outputs<components-output_parsers-outputs>`. :class:`JsonOutputParser` and :class:`YamlOutputParser` can work with :ref:`DataClass<core-dataclass>` for structured output.
+- **DataClass Parser**: On top of `YamlOutputParser` and `JsonOutputParser`, :class:`DataClassParser<components.output_parsers.dataclass_parser.DataClassParser>` is the most compatible to work with :ref:`DataClass<core-dataclass>` for structured output.
 
 
 
@@ -140,7 +159,44 @@ Thus, ``JsonOutputParser`` and ``YamlOutputParser`` both takes the following arg
 
 - ``data_class``: the ``DataClass`` type.
 - ``examples``: the examples of the data class instance if you want to show the examples in the prompt.
-- ``exclude``: the fields to exclude from both the data format and the examples.
+- ``exclude``: the fields to exclude from both the data format and the examples, a way to tell the ``format_instructions`` on which is the output field from the data class.
+
+DataClass Parser
+~~~~~~~~~~~~~~~~~~~~
+To make things even easier for the developers, we created :class:`DataClassParser<components.output_parsers.dataclass_parser.DataClassParser>` which
+understands `__input_fields__` and `__output_fields__` of the `DataClass`, and it is especially helpful to work on a training dataset where we will have both inputs and outputs.
+Users do not have to use `exclude/include` fields to specify the output fields, it will automatically understand the output fields from the `DataClass` instance.
+
+Below is an overview of its key components and functionalities.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 20 60
+
+   * - Method
+     - Description
+     - Details
+   * - ``__init__(data_class: DataClass, return_data_class: bool = False, format_type: Literal["yaml", "json"] = "json")``
+     - Initializes the DataClassParser
+     - Takes a DataClass type, whether to return the DataClass instance after parsing, and the output format type (JSON or YAML).
+   * - ``get_input_format_str() -> str``
+     - Returns formatted instructions for input data
+     - Provides a string representation of the input fields defined in the DataClass.
+   * - ``get_output_format_str() -> str``
+     - Returns formatted instructions for output data
+     - Generates a schema string for the output fields of the DataClass.
+   * - ``get_input_str(input: DataClass) -> str``
+     - Formats the input data as a string
+     - Converts a DataClass instance to either JSON or YAML based on the specified format type.
+   * - ``get_task_desc_str() -> str``
+     - Returns the task description string
+     - Retrieves the task description associated with the DataClass, useful for context in LLM prompts.
+   * - ``get_examples_str(examples: List[DataClass], include: Optional[IncludeType] = None, exclude: Optional[ExcludeType] = None) -> str``
+     - Formats a list of example DataClass instances
+     - Generates a formatted string representation of examples, adhering to the specified ``include/exclude`` parameters.
+   * - ``call(input: str) -> Any``
+     - Parses the output string to the desired format and returns parsed output
+     - Handles both JSON and YAML parsing, converting to the corresponding DataClass if specified.
 
 .. TODO: a summary table and a diagram
 
@@ -148,7 +204,8 @@ Parser in Action
 ------------------
 All of the parsers are quite straightforward to use.
 
-**BooleanParser**
+BooleanParser
+~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -181,7 +238,9 @@ The printout will be:
 
 Boolean parsers will not work for '1', '0', 'yes', 'no' as they are not the standard boolean values.
 
-**IntParser**
+
+IntParser
+~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -210,7 +269,9 @@ The printout will be:
 
 ``IntParser`` will return the integer value of the first number in the string, even if it is a float.
 
-**FloatParser**
+
+FloatParser
+~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -240,7 +301,9 @@ The printout will be:
 
 ``FloatParser`` will return the float value of the first number in the string, even if it is an integer.
 
-**ListParser**
+
+ListParser
+~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -263,7 +326,9 @@ The output will be:
     ['key', 2]
     [{'key': 'value'}, {'key': 'value'}]
 
-**JsonParser**
+
+JsonParser
+~~~~~~~~~~~~~~~~~~
 
 Even though it can work on lists, it is better to only use it for dictionaries.
 
@@ -294,7 +359,9 @@ The output will be:
     ['key', 2]
     [{'key': 'value'}, {'key': 'value'}]
 
-**YamlParser**
+
+YamlParser
+~~~~~~~~~~~~~~~~~~
 
 Though it works almost on all of the previous examples, it is better to use it for yaml formatted dictionaries.
 
@@ -344,7 +411,9 @@ And we will demonstrate how to use ``JsonOutputParser`` and ``YamlOutputParser``
 
     user_example = User(id=1, name="John")
 
-**JsonOutputParser**
+
+JsonOutputParser
+~~~~~~~~~~~~~~~~~~
 
 Here is how to use ``JsonOutputParser``:
 
@@ -416,7 +485,9 @@ The output will be:
 
     {'id': 2, 'name': 'Jane'}
 
-**YamlOutputParser**
+
+YamlOutputParser
+~~~~~~~~~~~~~~~~~~
 
 The steps are totally the same as the ``JsonOutputParser``.
 
@@ -496,6 +567,147 @@ The output will be:
 ..    .. [1] Jinja2: https://jinja.palletsprojects.com/en/3.1.x/
 ..    .. [2] Llama3 special tokens: https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
 
+DataclassParser in Action
+--------------------------
+
+First, let's create a new data class with both input and output fields.
+
+.. code-block:: python
+
+    @dataclass
+    class SampleDataClass(DataClass):
+        description: str = field(metadata={"desc": "A sample description"})
+        category: str = field(metadata={"desc": "Category of the sample"})
+        value: int = field(metadata={"desc": "A sample integer value"})
+        status: str = field(metadata={"desc": "Status of the sample"})
+
+        __input_fields__ = [
+            "description",
+            "category",
+        ]  # Define which fields are input fields
+        __output_fields__ = ["value", "status"]  # Define which fields are output fields
+
+
+Now, lets' create a parser that will use the `SampleDataClass` to parse the output json string back to the data class instance.
+
+.. code-block:: python
+
+    from adalflow.components.output_parsers import DataClassParser
+
+    parser = DataClassParser(data_class=SampleDataClass, return_data_class=True, format_type="json")
+
+Let's view the structure of the parser use `print(parser)`.
+
+The output will be:
+
+.. code-block::
+
+    DataClassParser(
+        data_class=SampleDataClass, format_type=json,            return_data_class=True, input_fields=['description', 'category'],            output_fields=['value', 'status']
+        (_output_processor): JsonParser()
+        (output_format_prompt): Prompt(
+            template: Your output should be formatted as a standard JSON instance with the following schema:
+            ```
+            {{schema}}
+            ```
+            -Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!
+            -Use double quotes for the keys and string values.
+            -DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the JSON output.
+            -Follow the JSON formatting conventions., prompt_variables: ['schema']
+        )
+    )
+
+You can get the output and input format strings using the following methods:
+
+.. code-block:: python
+
+    print(parser.get_input_format_str())
+    print(parser.get_output_format_str())
+
+The output for the output format string will be:
+
+.. code-block::
+
+    Your output should be formatted as a standard JSON instance with the following schema:
+    ```
+    {
+        "value": " (int) (required)",
+        "status": " (str) (required)"
+    }
+    ```
+    -Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!
+    -Use double quotes for the keys and string values.
+    -DO NOT mistaken the "properties" and "type" in the schema as the actual fields in the JSON output.
+    -Follow the JSON formatting conventions.
+
+The input format string will be:
+
+.. code-block::
+
+    {
+        "description": " (str) (required)",
+        "category": " (str) (required)"
+    }
+
+Convert a json string to a data class instance:
+
+.. code-block:: python
+
+    user_input = '{"description": "Parsed description", "category": "Sample Category", "value": 100, "status": "active"}'
+    parsed_instance = parser.call(user_input)
+
+    print(parsed_instance)
+
+The output will be:
+
+.. code-block:: python
+
+    SampleDataClass(description='Parsed description', category='Sample Category', value=100, status='active')
+
+Try the examples string:
+
+.. code-block:: python
+
+    samples = [
+        SampleDataClass(
+            description="Sample description",
+            category="Sample category",
+            value=100,
+            status="active",
+        ),
+        SampleDataClass(
+            description="Another description",
+            category="Another category",
+            value=200,
+            status="inactive",
+        ),
+    ]
+
+    examples_str = parser.get_examples_str(examples=samples)
+    print(examples_str)
+
+The output will be:
+
+.. code-block:: python
+
+    examples_str:
+    {
+        "description": "Sample description",
+        "category": "Sample category",
+        "value": 100,
+        "status": "active"
+    }
+    __________
+    {
+        "description": "Another description",
+        "category": "Another category",
+        "value": 200,
+        "status": "inactive"
+    }
+    __________
+
+
+
 
 .. admonition:: API References
    :class: highlight
@@ -507,3 +719,5 @@ The output will be:
    - :class:`components.output_parsers.outputs.OutputParser`
    - :class:`components.output_parsers.outputs.BooleanOutputParser`
    - :class:`components.output_parsers.outputs.ListOutputParser`
+   - :class:`components.output_parsers.dataclass_parser.DataClassParser`
+   - :class:`core.base_data_class.DataClass`
diff --git a/docs/source/tutorials/text_splitter.rst b/docs/source/tutorials/text_splitter.rst
index 60541dff..4e7da43a 100644
--- a/docs/source/tutorials/text_splitter.rst
+++ b/docs/source/tutorials/text_splitter.rst
@@ -1,3 +1,15 @@
+.. raw:: html
+
+   <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
+      <a href="https://colab.research.google.com/github/SylphAI-Inc/LightRAG/blob/main/notebooks/tutorials/adalflow_text_splitter.ipynb" target="_blank" style="margin-right: 10px;">
+         <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
+      </a>
+      <a href="https://github.com/SylphAI-Inc/LightRAG/blob/main/adalflow/tutorials/adalflow_text_splitter.py" target="_blank" style="display: flex; align-items: center;">
+         <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 20px; width: 20px; margin-right: 5px;">
+         <span style="vertical-align: middle;"> Open Source Code</span>
+      </a>
+   </div>
+
 .. _tutorials-text_splitter:
 
 
diff --git a/docs/source/use_cases/question_answering.rst b/docs/source/use_cases/question_answering.rst
index 0b730400..618ad88d 100644
--- a/docs/source/use_cases/question_answering.rst
+++ b/docs/source/use_cases/question_answering.rst
@@ -1,7 +1,7 @@
 .. raw:: html
 
    <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
-      <a href="https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/notebooks/qas/adalflow_object_count_auto_optimization.ipynb" target="_blank" style="margin-right: 10px;">
+      <a href="https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/qas/adalflow_object_count_auto_optimization.ipynb" target="_blank" style="margin-right: 10px;">
          <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
       </a>
       <a href="https://github.com/SylphAI-Inc/AdalFlow/tree/main/use_cases/question_answering/bbh/object_count" target="_blank" style="display: flex; align-items: center;">
@@ -396,7 +396,7 @@ Here’s the minimum code required to get started on evaluating the task pipelin
 
         def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:
             y_label = -1
-            if y_pred and y_pred.data:
+            if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0
                 y_label = y_pred.data
             return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
 
diff --git a/docs/source/use_cases/question_answering_word_sort.rst b/docs/source/use_cases/question_answering_word_sort.rst
index a349f6dd..4b57a5b4 100644
--- a/docs/source/use_cases/question_answering_word_sort.rst
+++ b/docs/source/use_cases/question_answering_word_sort.rst
@@ -375,7 +375,7 @@ Here’s the minimum code required to get started on evaluating the task pipelin
             self, sample: Example, y_pred: adal.GeneratorOutput
         ) -> float:
             y_label = -1
-            if y_pred and y_pred.data:
+            if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0
                 y_label = y_pred.data
             return self.eval_fn(y=y_label, y_gt=sample.answer)
 
diff --git a/notebooks/README.md b/notebooks/README.md
index 2e1d4f1c..3d2e6e94 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -19,6 +19,11 @@ The template consists of three parts:
 2. Content section of your notebook. Link to Next that users can look at.
 3. Issues and Feedback.
 
+## If you want to use a ikernel in .ipynb to test notebooks
+
+You can use the following command to install the kernel at the root of the project:
+
+```poetry run python -m ipykernel install --user --name my-project-kernel```
 
 ## If you need to use dev api
 
diff --git a/notebooks/adalflow_colab_template.ipynb b/notebooks/adalflow_colab_template.ipynb
index 480d5b1a..191bbf08 100644
--- a/notebooks/adalflow_colab_template.ipynb
+++ b/notebooks/adalflow_colab_template.ipynb
@@ -5,7 +5,7 @@
    "metadata": {},
    "source": [
     "# 🤗 Welcome to AdalFlow!\n",
-    "## The PyTorch library to auto-optimize any LLM task pipelines\n",
+    "## The library to build & auto-optimize any LLM task pipelines\n",
     "\n",
     "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ <i>Star us on <a href=\"https://github.com/SylphAI-Inc/AdalFlow\">Github</a> </i> ⭐\n",
     "\n",
@@ -20,6 +20,10 @@
     "\n",
     "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
     "\n",
+    "# Author\n",
+    "\n",
+    "This notebook was created by community contributor [Name](Replace_to_github_or_other_social_account).\n",
+    "\n",
     "# Outline\n",
     "\n",
     "This is a quick introduction of what AdalFlow is capable of. We will cover:\n",
diff --git a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
index 017363cd..ac7e3cbf 100644
--- a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
+++ b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
@@ -1,8119 +1,8120 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VVSOpjzJl_cx"
+   },
+   "source": [
+    "# 🤗 Welcome to AdalFlow!\n",
+    "## The library to build & auto-optimize any LLM task pipelines\n",
+    "\n",
+    "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help!\n",
+    "\n",
+    "\n",
+    "# Quick Links\n",
+    "\n",
+    "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
+    "\n",
+    "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
+    "\n",
+    "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
+    "\n",
+    "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
+    "\n",
+    "# Outline\n",
+    "\n",
+    "*Note: As training can consume tokens fast, and the notebook runtime will reset everytime you use, it might be better for you to learn training in your local editor.*\n",
+    "\n",
+    "This is a quick introduction of AdalFlow on question answering use case end to end\n",
+    "\n",
+    "* Trainable Task pipeline with trainable parameters\n",
+    "* Create AdalComponent for your task pipeline\n",
+    "* Use Trainer to diagnose, debug, and to train.\n",
+    "\n",
+    "You can find all source code here: https://github.com/SylphAI-Inc/AdalFlow/tree/main/use_cases/question_answering/bhh_object_count\n",
+    "\n",
+    "**Here is the more detailed tutorial for the code here: https://adalflow.sylph.ai/use_cases/question_answering.html**\n",
+    "\n",
+    "\n",
+    "# Installation\n",
+    "\n",
+    "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq`, and `faiss`(cpu version) from the extra packages.\n",
+    "\n",
+    "  ```bash\n",
+    "  pip install adalflow[openai,groq,faiss-cpu]\n",
+    "  ```\n",
+    "2. Setup  `openai` and `groq` API key in the environment variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "THTvmhjgfiHE"
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import clear_output\n",
+    "\n",
+    "!pip install -U adalflow[openai,groq,datasets]\n",
+    "\n",
+    "clear_output()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
     "colab": {
-      "provenance": []
+     "base_uri": "https://localhost:8080/",
+     "height": 35
     },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
+    "id": "nJteJKsNrpcu",
+    "outputId": "d9f7b4d0-d11c-480d-d858-bf9022c18998"
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
+      },
+      "text/plain": [
+       "'0.2.0'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import adalflow as adal\n",
+    "\n",
+    "adal.__version__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "KapUyHMM07pJ"
+   },
+   "source": [
+    "## Set Environment Variables\n",
+    "\n",
+    "Run the following code and pass your api key.\n",
+    "\n",
+    "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n",
+    "\n",
+    "*Go to [OpenAI](https://platform.openai.com/docs/introduction) and [Groq](https://console.groq.com/docs/) to get API keys if you don't already have.*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    "language_info": {
-      "name": "python"
+    "id": "ONfzF9Puzdd_",
+    "outputId": "6a815e21-ab99-463e-c53b-e39ca2ce8f3f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Please enter your OpenAI API key: ··········\n",
+      "Please enter your GROQ API key: ··········\n",
+      "API keys have been set.\n"
+     ]
     }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# Prompt user to enter their API keys securely\n",
+    "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+    "groq_api_key = getpass(\"Please enter your GROQ API key, simplly press Enter if you don't have one: \")\n",
+    "\n",
+    "\n",
+    "# Set environment variables\n",
+    "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+    "os.environ['GROQ_API_KEY'] = groq_api_key\n",
+    "\n",
+    "print(\"API keys have been set.\")"
+   ]
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# 🤗 Welcome to AdalFlow!\n",
-        "## The PyTorch library to auto-optimize any LLM task pipelines\n",
-        "\n",
-        "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help!\n",
-        "\n",
-        "\n",
-        "# Quick Links\n",
-        "\n",
-        "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
-        "\n",
-        "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
-        "\n",
-        "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
-        "\n",
-        "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
-        "\n",
-        "# Outline\n",
-        "\n",
-        "*Note: As training can consume tokens fast, and the notebook runtime will reset everytime you use, it might be better for you to learn training in your local editor.*\n",
-        "\n",
-        "This is a quick introduction of AdalFlow on question answering use case end to end\n",
-        "\n",
-        "* Trainable Task pipeline with trainable parameters\n",
-        "* Create AdalComponent for your task pipeline\n",
-        "* Use Trainer to diagnose, debug, and to train.\n",
-        "\n",
-        "You can find all source code here: https://github.com/SylphAI-Inc/AdalFlow/tree/main/use_cases/question_answering/bhh_object_count\n",
-        "\n",
-        "**Here is the more detailed tutorial for the code here: https://adalflow.sylph.ai/use_cases/question_answering.html**\n",
-        "\n",
-        "\n",
-        "# Installation\n",
-        "\n",
-        "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq`, and `faiss`(cpu version) from the extra packages.\n",
-        "\n",
-        "  ```bash\n",
-        "  pip install adalflow[openai,groq,faiss-cpu]\n",
-        "  ```\n",
-        "2. Setup  `openai` and `groq` API key in the environment variables"
-      ],
-      "metadata": {
-        "id": "VVSOpjzJl_cx"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "THTvmhjgfiHE"
-      },
-      "outputs": [],
-      "source": [
-        "from IPython.display import clear_output\n",
-        "\n",
-        "!pip install -U adalflow[openai,groq,datasets]\n",
-        "\n",
-        "clear_output()"
-      ]
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "SfGS7iddtfpj"
+   },
+   "source": [
+    "\n",
+    "\n",
+    "# 😇 Trainable Task Pipeline\n",
+    "\n",
+    "We will create a task pipeline consists of a generator, with a customzied template, a customized output parser.\n",
+    "\n",
+    "Different from our other pipelines where the `prompt_kwargs` values are strings, but here we will use ``Parameter``. And we will set up two parameter, one is of ``ParameterType.PROMPT`` and the other of type ``ParameterType.DEMOS``. The first one will be trained by text-grad and the second will be trained by boostrap few shot optimizer.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "nHnvAbO-pXUq"
+   },
+   "outputs": [],
+   "source": [
+    "import adalflow as adal\n",
+    "import re\n",
+    "from typing import Dict, Union\n",
+    "import adalflow as adal\n",
+    "from adalflow.optim.types import ParameterType\n",
+    "\n",
+    "\n",
+    "@adal.fun_to_component\n",
+    "def parse_integer_answer(answer: str):\n",
+    "    \"\"\"A function that parses the last integer from a string using regular expressions.\"\"\"\n",
+    "    try:\n",
+    "        # Use regular expression to find all sequences of digits\n",
+    "        numbers = re.findall(r\"\\d+\", answer)\n",
+    "        if numbers:\n",
+    "            # Get the last number found\n",
+    "            answer = int(numbers[-1])\n",
+    "        else:\n",
+    "            answer = -1\n",
+    "    except ValueError:\n",
+    "        answer = -1\n",
+    "\n",
+    "    return answer\n",
+    "\n",
+    "\n",
+    "few_shot_template = r\"\"\"<START_OF_SYSTEM_PROMPT>\n",
+    "{{system_prompt}}\n",
+    "{# Few shot demos #}\n",
+    "{% if few_shot_demos is not none %}\n",
+    "Here are some examples:\n",
+    "{{few_shot_demos}}\n",
+    "{% endif %}\n",
+    "<END_OF_SYSTEM_PROMPT>\n",
+    "<START_OF_USER>\n",
+    "{{input_str}}\n",
+    "<END_OF_USER>\n",
+    "\"\"\"\n",
+    "\n",
+    "class ObjectCountTaskPipeline(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        system_prompt = adal.Parameter(\n",
+    "            data=\"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
+    "            role_desc=\"To give task instruction to the language model in the system prompt\",\n",
+    "            requires_opt=True,\n",
+    "            param_type=ParameterType.PROMPT,\n",
+    "        )\n",
+    "        few_shot_demos = adal.Parameter(\n",
+    "            data=None,\n",
+    "            role_desc=\"To provide few shot demos to the language model\",\n",
+    "            requires_opt=True,  # Changed to True for few-shot learning\n",
+    "            param_type=ParameterType.DEMOS,\n",
+    "        )\n",
+    "\n",
+    "        self.llm_counter = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=few_shot_template,\n",
+    "            prompt_kwargs={\n",
+    "                \"system_prompt\": system_prompt,\n",
+    "                \"few_shot_demos\": few_shot_demos,\n",
+    "            },\n",
+    "            output_processors=parse_integer_answer,\n",
+    "            use_cache=True,\n",
+    "        )\n",
+    "\n",
+    "    def call(\n",
+    "        self, question: str, id: str = None\n",
+    "    ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n",
+    "        output = self.llm_counter(prompt_kwargs={\"input_str\": question}, id=id)\n",
+    "        return output\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "AvZJjdzZa0cT"
+   },
+   "source": [
+    "Next, we will run this pipeline in both train and eval mode.\n",
+    "\n",
+    "#### Eval mode with GeneratorOutput\n",
+    "\n",
+    "Eval mode will output ``GeneratorOutput``.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Gks3yS8hcR6_"
+   },
+   "source": [
+    "\n",
+    "#### Train mode with different form of output\n",
+    "\n",
+    "Train mode will return ``Parameter``, where the `data` field will be the `raw_response`` from the GeneratorOutput, and we put the full GeneratorOutput at the ``full_response`` in the parameter.\n",
+    "\n",
+    "As the `data` field of the `Parameter` directly communicate with the Optimizer, which are an LLM itself, its better than they understand exactly the string response itself instead of the parsed one.\n",
+    "\n",
+    "Later you will see that we also use ``eval_input`` of the parameter to communicate with the `LossFunction` as that need the parsed final output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "eqQSFnZOpfWJ",
+    "outputId": "05b5fc83-09d1-45f4-aacc-6d460fbdd7bd"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "import adalflow as adal\n",
-        "\n",
-        "adal.__version__"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "id": "nJteJKsNrpcu",
-        "outputId": "d9f7b4d0-d11c-480d-d858-bf9022c18998"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "'0.2.0'"
-            ],
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            }
-          },
-          "metadata": {},
-          "execution_count": 2
-        }
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "source": [
-        "## Set Environment Variables\n",
-        "\n",
-        "Run the following code and pass your api key.\n",
-        "\n",
-        "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n",
-        "\n",
-        "*Go to [OpenAI](https://platform.openai.com/docs/introduction) and [Groq](https://console.groq.com/docs/) to get API keys if you don't already have.*"
-      ],
-      "metadata": {
-        "id": "KapUyHMM07pJ"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "\n",
-        "from getpass import getpass\n",
-        "\n",
-        "# Prompt user to enter their API keys securely\n",
-        "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
-        "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n",
-        "\n",
-        "\n",
-        "# Set environment variables\n",
-        "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
-        "os.environ['GROQ_API_KEY'] = groq_api_key\n",
-        "\n",
-        "print(\"API keys have been set.\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ONfzF9Puzdd_",
-        "outputId": "6a815e21-ab99-463e-c53b-e39ca2ce8f3f"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Please enter your OpenAI API key: ··········\n",
-            "Please enter your GROQ API key: ··········\n",
-            "API keys have been set.\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountTaskPipeline(\n",
+      "  (llm_counter): Generator(\n",
+      "    model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "    (prompt): Prompt(\n",
+      "      template: <START_OF_SYSTEM_PROMPT>\n",
+      "      {{system_prompt}}\n",
+      "      {# Few shot demos #}\n",
+      "      {% if few_shot_demos is not none %}\n",
+      "      Here are some examples:\n",
+      "      {{few_shot_demos}}\n",
+      "      {% endif %}\n",
+      "      <END_OF_SYSTEM_PROMPT>\n",
+      "      <START_OF_USER>\n",
+      "      {{input_str}}\n",
+      "      <END_OF_USER>\n",
+      "      , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "    )\n",
+      "    (model_client): OpenAIClient()\n",
+      "    (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "from adalflow.components.model_client.openai_client import OpenAIClient\n",
+    "from adalflow.components.model_client.groq_client import GroqAPIClient\n",
+    "\n",
+    "\n",
+    "if len(os.environ['OPENAI_API_KEY']) > 1:\n",
+    "  gpt_3_model = {\n",
+    "      \"model_client\": OpenAIClient(),\n",
+    "      \"model_kwargs\": {\n",
+    "          \"model\": \"gpt-3.5-turbo\",\n",
+    "          \"max_tokens\": 2000,\n",
+    "          \"temperature\": 0.0,\n",
+    "          \"top_p\": 0.99,\n",
+    "          \"frequency_penalty\": 0,\n",
+    "          \"presence_penalty\": 0,\n",
+    "          \"stop\": None,\n",
+    "      },\n",
+    "  }\n",
+    "  gpt_4o_model = {\n",
+    "      \"model_client\": OpenAIClient(),\n",
+    "      \"model_kwargs\": {\n",
+    "          \"model\": \"gpt-4o\",\n",
+    "          \"max_tokens\": 4000,\n",
+    "          \"temperature\": 0.0,\n",
+    "          \"top_p\": 0.99,\n",
+    "          \"frequency_penalty\": 0,\n",
+    "          \"presence_penalty\": 0,\n",
+    "          \"stop\": None,\n",
+    "      },\n",
+    "  }\n",
+    "\n",
+    "if len(os.environ['GROQ_API_KEY']) > 1:\n",
+    "  llama_3_1_model ={\n",
+    "      \"model_client\": GroqAPIClient(),\n",
+    "      \"model_kwargs\": {\n",
+    "          \"model\": \"llama-3.1-8b-instant\"\n",
+    "      }\n",
+    "  }\n",
+    "\n",
+    "\n",
+    "question = \"I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?\"\n",
+    "task_pipeline = ObjectCountTaskPipeline(**gpt_3_model)\n",
+    "print(task_pipeline)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "DE1xNdYvcXw8",
+    "outputId": "25844c2a-5d4c-4c68-8ca5-38b79ca5b398"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "source": [
-        "\n",
-        "\n",
-        "# 😇 Trainable Task Pipeline\n",
-        "\n",
-        "We will create a task pipeline consists of a generator, with a customzied template, a customized output parser.\n",
-        "\n",
-        "Different from our other pipelines where the `prompt_kwargs` values are strings, but here we will use ``Parameter``. And we will set up two parameter, one is of ``ParameterType.PROMPT`` and the other of type ``ParameterType.DEMOS``. The first one will be trained by text-grad and the second will be trained by boostrap few shot optimizer.\n",
-        "\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "SfGS7iddtfpj"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import adalflow as adal\n",
-        "import re\n",
-        "from typing import Dict, Union\n",
-        "import adalflow as adal\n",
-        "from adalflow.optim.types import ParameterType\n",
-        "\n",
-        "\n",
-        "@adal.fun_to_component\n",
-        "def parse_integer_answer(answer: str):\n",
-        "    \"\"\"A function that parses the last integer from a string using regular expressions.\"\"\"\n",
-        "    try:\n",
-        "        # Use regular expression to find all sequences of digits\n",
-        "        numbers = re.findall(r\"\\d+\", answer)\n",
-        "        if numbers:\n",
-        "            # Get the last number found\n",
-        "            answer = int(numbers[-1])\n",
-        "        else:\n",
-        "            answer = -1\n",
-        "    except ValueError:\n",
-        "        answer = -1\n",
-        "\n",
-        "    return answer\n",
-        "\n",
-        "\n",
-        "few_shot_template = r\"\"\"<START_OF_SYSTEM_PROMPT>\n",
-        "{{system_prompt}}\n",
-        "{# Few shot demos #}\n",
-        "{% if few_shot_demos is not none %}\n",
-        "Here are some examples:\n",
-        "{{few_shot_demos}}\n",
-        "{% endif %}\n",
-        "<END_OF_SYSTEM_PROMPT>\n",
-        "<START_OF_USER>\n",
-        "{{input_str}}\n",
-        "<END_OF_USER>\n",
-        "\"\"\"\n",
-        "\n",
-        "class ObjectCountTaskPipeline(adal.Component):\n",
-        "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
-        "        super().__init__()\n",
-        "\n",
-        "        system_prompt = adal.Parameter(\n",
-        "            data=\"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
-        "            role_desc=\"To give task instruction to the language model in the system prompt\",\n",
-        "            requires_opt=True,\n",
-        "            param_type=ParameterType.PROMPT,\n",
-        "        )\n",
-        "        few_shot_demos = adal.Parameter(\n",
-        "            data=None,\n",
-        "            role_desc=\"To provide few shot demos to the language model\",\n",
-        "            requires_opt=True,  # Changed to True for few-shot learning\n",
-        "            param_type=ParameterType.DEMOS,\n",
-        "        )\n",
-        "\n",
-        "        self.llm_counter = adal.Generator(\n",
-        "            model_client=model_client,\n",
-        "            model_kwargs=model_kwargs,\n",
-        "            template=few_shot_template,\n",
-        "            prompt_kwargs={\n",
-        "                \"system_prompt\": system_prompt,\n",
-        "                \"few_shot_demos\": few_shot_demos,\n",
-        "            },\n",
-        "            output_processors=parse_integer_answer,\n",
-        "            use_cache=True,\n",
-        "        )\n",
-        "\n",
-        "    def call(\n",
-        "        self, question: str, id: str = None\n",
-        "    ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n",
-        "        output = self.llm_counter(prompt_kwargs={\"input_str\": question}, id=id)\n",
-        "        return output\n",
-        "\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "nHnvAbO-pXUq"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Next, we will run this pipeline in both train and eval mode.\n",
-        "\n",
-        "#### Eval mode with GeneratorOutput\n",
-        "\n",
-        "Eval mode will output ``GeneratorOutput``.\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "AvZJjdzZa0cT"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "\n",
-        "#### Train mode with different form of output\n",
-        "\n",
-        "Train mode will return ``Parameter``, where the `data` field will be the `raw_response`` from the GeneratorOutput, and we put the full GeneratorOutput at the ``full_response`` in the parameter.\n",
-        "\n",
-        "As the `data` field of the `Parameter` directly communicate with the Optimizer, which are an LLM itself, its better than they understand exactly the string response itself instead of the parsed one.\n",
-        "\n",
-        "Later you will see that we also use ``eval_input`` of the parameter to communicate with the `LossFunction` as that need the parsed final output."
-      ],
-      "metadata": {
-        "id": "Gks3yS8hcR6_"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from adalflow.components.model_client.openai_client import OpenAIClient\n",
-        "from adalflow.components.model_client.groq_client import GroqAPIClient\n",
-        "\n",
-        "\n",
-        "gpt_3_model = {\n",
-        "    \"model_client\": OpenAIClient(),\n",
-        "    \"model_kwargs\": {\n",
-        "        \"model\": \"gpt-3.5-turbo\",\n",
-        "        \"max_tokens\": 2000,\n",
-        "        \"temperature\": 0.0,\n",
-        "        \"top_p\": 0.99,\n",
-        "        \"frequency_penalty\": 0,\n",
-        "        \"presence_penalty\": 0,\n",
-        "        \"stop\": None,\n",
-        "    },\n",
-        "}\n",
-        "\n",
-        "llama_3_1_model ={\n",
-        "    \"model_client\": GroqAPIClient(),\n",
-        "    \"model_kwargs\": {\n",
-        "        \"model\": \"llama-3.1-8b-instant\"\n",
-        "    }\n",
-        "}\n",
-        "\n",
-        "gpt_4o_model = {\n",
-        "    \"model_client\": OpenAIClient(),\n",
-        "    \"model_kwargs\": {\n",
-        "        \"model\": \"gpt-4o\",\n",
-        "        \"max_tokens\": 4000,\n",
-        "        \"temperature\": 0.0,\n",
-        "        \"top_p\": 0.99,\n",
-        "        \"frequency_penalty\": 0,\n",
-        "        \"presence_penalty\": 0,\n",
-        "        \"stop\": None,\n",
-        "    },\n",
-        "}\n",
-        "\n",
-        "\n",
-        "question = \"I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?\"\n",
-        "task_pipeline = ObjectCountTaskPipeline(**gpt_3_model)\n",
-        "print(task_pipeline)\n"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "eqQSFnZOpfWJ",
-        "outputId": "05b5fc83-09d1-45f4-aacc-6d460fbdd7bd"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountTaskPipeline(\n",
-            "  (llm_counter): Generator(\n",
-            "    model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "    (prompt): Prompt(\n",
-            "      template: <START_OF_SYSTEM_PROMPT>\n",
-            "      {{system_prompt}}\n",
-            "      {# Few shot demos #}\n",
-            "      {% if few_shot_demos is not none %}\n",
-            "      Here are some examples:\n",
-            "      {{few_shot_demos}}\n",
-            "      {% endif %}\n",
-            "      <END_OF_SYSTEM_PROMPT>\n",
-            "      <START_OF_USER>\n",
-            "      {{input_str}}\n",
-            "      <END_OF_USER>\n",
-            "      , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "    )\n",
-            "    (model_client): OpenAIClient()\n",
-            "    (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "  )\n",
-            ")\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GeneratorOutput(id='1', data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "answer = task_pipeline(question, id=\"1\")\n",
+    "print(answer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "AGUlUsGxcaby",
+    "outputId": "8c8588fe-2994-4d9e-c2d1-26453141f43f"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "answer = task_pipeline(question, id=\"1\")\n",
-        "print(answer)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "DE1xNdYvcXw8",
-        "outputId": "25844c2a-5d4c-4c68-8ca5-38b79ca5b398"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "GeneratorOutput(id='1', data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Parameter(name=Generator_output, requires_opt=True, param_type=generator_output (The output of the generator.), role_desc=Output from (llm) Generator, data=To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \n",
+      "\n",
+      "You have:\n",
+      "- Flute\n",
+      "- Piano\n",
+      "- Trombone\n",
+      "- Violin\n",
+      "- Accordion\n",
+      "- Clarinet\n",
+      "- Drum\n",
+      "- Trumpet\n",
+      "\n",
+      "Counting each of these instruments, we get a total of 8 musical instruments.\n",
+      "\n",
+      "Answer: 8, predecessors={Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={})}, gradients=[],            raw_response=None, input_args={'prompt_kwargs': {'system_prompt': Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'few_shot_demos': Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'input_str': 'I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?'}, 'model_kwargs': {'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, traces={})\n",
+      "full_response: GeneratorOutput(id=None, data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# set it to train mode\n",
+    "task_pipeline.train()\n",
+    "answer = task_pipeline(question, id=\"1\")\n",
+    "print(answer)\n",
+    "print(f\"full_response: {answer.full_response}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "YDAiuFzcr4YA"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install datasets\n",
+    "clear_output()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-Gvfcy2IcgWx"
+   },
+   "source": [
+    "### Load Datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "AYBIGsIHpjMe"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.datasets.big_bench_hard import BigBenchHard\n",
+    "from adalflow.utils.data import subset_dataset\n",
+    "\n",
+    "def load_datasets(max_samples: int = None):\n",
+    "    \"\"\"Load the dataset\"\"\"\n",
+    "    train_data = BigBenchHard(split=\"train\")\n",
+    "    val_data = BigBenchHard(split=\"val\")\n",
+    "    test_data = BigBenchHard(split=\"test\")\n",
+    "\n",
+    "    # Limit the number of samples\n",
+    "    if max_samples:\n",
+    "        train_data = subset_dataset(train_data, max_samples)\n",
+    "        val_data = subset_dataset(val_data, max_samples)\n",
+    "        test_data = subset_dataset(test_data, max_samples)\n",
+    "\n",
+    "    return train_data, val_data, test_data\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "asw-pJrid8ly",
+    "outputId": "31807c34-0de9-45e5-ebdd-778aa5313802"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "# set it to train mode\n",
-        "task_pipeline.train()\n",
-        "answer = task_pipeline(question, id=\"1\")\n",
-        "print(answer)\n",
-        "print(f\"full_response: {answer.full_response}\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "AGUlUsGxcaby",
-        "outputId": "8c8588fe-2994-4d9e-c2d1-26453141f43f"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Parameter(name=Generator_output, requires_opt=True, param_type=generator_output (The output of the generator.), role_desc=Output from (llm) Generator, data=To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \n",
-            "\n",
-            "You have:\n",
-            "- Flute\n",
-            "- Piano\n",
-            "- Trombone\n",
-            "- Violin\n",
-            "- Accordion\n",
-            "- Clarinet\n",
-            "- Drum\n",
-            "- Trumpet\n",
-            "\n",
-            "Counting each of these instruments, we get a total of 8 musical instruments.\n",
-            "\n",
-            "Answer: 8, predecessors={Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={})}, gradients=[],            raw_response=None, input_args={'prompt_kwargs': {'system_prompt': Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'few_shot_demos': Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'input_str': 'I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?'}, 'model_kwargs': {'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, traces={})\n",
-            "full_response: GeneratorOutput(id=None, data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Example(id='d3f33ded-170a-4b87-9b0b-987d5fb7b817', question='I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?', answer='4')\n"
+     ]
+    }
+   ],
+   "source": [
+    "# check the datasets\n",
+    "\n",
+    "train_data, val_data, test_data = load_datasets(max_samples=2)\n",
+    "print(train_data[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VAVtXE9xeEHt"
+   },
+   "source": [
+    "### Soft link to AdalFlow default file path\n",
+    "\n",
+    "Lets' match the default to the current project, so that you can see the downloaded data and later the checkpoints of the training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "1SaKH6dkeWus"
+   },
+   "outputs": [],
+   "source": [
+    "! ln -s /root/.adalflow /content/adalflow\n",
+    "\n",
+    "# go to files then you will see a folder named as adalflow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "YWZzOvAHenME"
+   },
+   "source": [
+    "# 😊 AdalComponent to define everything we need to train\n",
+    "\n",
+    "1. We need `backward_engine_model_config`` for ``backward_engine`` to compute gradient.\n",
+    "\n",
+    "2. We need ``text_optimizer_model_config`` for the `text optimizer` for propose new prompts.\n",
+    "\n",
+    "3. For the demo optimizer, we need a `teacher_model_config` to config a teacher generator, in this case, it is the `llm_counter`. The teacher will share the same prompt with the `llm_counter` but you can use a more advanced model.\n",
+    "\n",
+    "In general, we should have all of these parts to use a more advanced model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "9QoNoMWD0rgV"
+   },
+   "source": [
+    "## 🧑 Diagnose\n",
+    "\n",
+    "Diagnose is more of an evaluation, but with detailed logs so that you can manually inspect the wrong output.\n",
+    "\n",
+    "This one shows the minimum config you need to get the `diagnose` work."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "6mi7lM3U24Eg"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.datasets.types import Example\n",
+    "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
+    "\n",
+    "\n",
+    "class ObjectCountAdalComponent(adal.AdalComponent):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
+    "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
+    "        super().__init__(task=task, eval_fn=eval_fn)\n",
+    "\n",
+    "    def prepare_task(self, sample: Example):\n",
+    "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
+    "\n",
+    "    def prepare_eval(\n",
+    "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
+    "    ) -> float:\n",
+    "        y_label = -1\n",
+    "        if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0\n",
+    "            y_label = y_pred.data\n",
+    "        return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "eliPeVeM2wcP"
+   },
+   "outputs": [],
+   "source": [
+    "def diagnose(\n",
+    "    model_client: adal.ModelClient,\n",
+    "    model_kwargs: Dict,\n",
+    ") -> Dict:\n",
+    "\n",
+    "    trainset, valset, testset = load_datasets()\n",
+    "    # use max_samples=10 to test the code\n",
+    "\n",
+    "    adal_component = ObjectCountAdalComponent(model_client, model_kwargs)\n",
+    "    trainer = adal.Trainer(adaltask=adal_component)\n",
+    "    trainer.diagnose(dataset=trainset, split=\"train\")\n",
+    "    trainer.diagnose(dataset=valset, split=\"val\")\n",
+    "    trainer.diagnose(dataset=testset, split=\"test\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "nKl9clcb3dFj",
+    "outputId": "676fbb96-c70b-40ab-ea15-93ade1aa9e66"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "!pip install datasets\n",
-        "clear_output()"
-      ],
-      "metadata": {
-        "id": "YDAiuFzcr4YA"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Load Datasets"
-      ],
-      "metadata": {
-        "id": "-Gvfcy2IcgWx"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from adalflow.datasets.big_bench_hard import BigBenchHard\n",
-        "from adalflow.utils.data import subset_dataset\n",
-        "\n",
-        "def load_datasets(max_samples: int = None):\n",
-        "    \"\"\"Load the dataset\"\"\"\n",
-        "    train_data = BigBenchHard(split=\"train\")\n",
-        "    val_data = BigBenchHard(split=\"val\")\n",
-        "    test_data = BigBenchHard(split=\"test\")\n",
-        "\n",
-        "    # Limit the number of samples\n",
-        "    if max_samples:\n",
-        "        train_data = subset_dataset(train_data, max_samples)\n",
-        "        val_data = subset_dataset(val_data, max_samples)\n",
-        "        test_data = subset_dataset(test_data, max_samples)\n",
-        "\n",
-        "    return train_data, val_data, test_data\n"
-      ],
-      "metadata": {
-        "id": "AYBIGsIHpjMe"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# check the datasets\n",
-        "\n",
-        "train_data, val_data, test_data = load_datasets(max_samples=2)\n",
-        "print(train_data[0])"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "asw-pJrid8ly",
-        "outputId": "31807c34-0de9-45e5-ebdd-778aa5313802"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Example(id='d3f33ded-170a-4b87-9b0b-987d5fb7b817', question='I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?', answer='4')\n"
-          ]
-        }
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "source": [
-        "### Soft link to AdalFlow default file path\n",
-        "\n",
-        "Lets' match the default to the current project, so that you can see the downloaded data and later the checkpoints of the training."
-      ],
-      "metadata": {
-        "id": "VAVtXE9xeEHt"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "! ln -s /root/.adalflow /content/adalflow\n",
-        "\n",
-        "# go to files then you will see a folder named as adalflow"
-      ],
-      "metadata": {
-        "id": "1SaKH6dkeWus"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# 😊 AdalComponent to define everything we need to train\n",
-        "\n",
-        "1. We need `backward_engine_model_config`` for ``backward_engine`` to compute gradient.\n",
-        "\n",
-        "2. We need ``text_optimizer_model_config`` for the `text optimizer` for propose new prompts.\n",
-        "\n",
-        "3. For the demo optimizer, we need a `teacher_model_config` to config a teacher generator, in this case, it is the `llm_counter`. The teacher will share the same prompt with the `llm_counter` but you can use a more advanced model.\n",
-        "\n",
-        "In general, we should have all of these parts to use a more advanced model."
-      ],
-      "metadata": {
-        "id": "YWZzOvAHenME"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 🧑 Diagnose\n",
-        "\n",
-        "Diagnose is more of an evaluation, but with detailed logs so that you can manually inspect the wrong output.\n",
-        "\n",
-        "This one shows the minimum config you need to get the `diagnose` work."
-      ],
-      "metadata": {
-        "id": "9QoNoMWD0rgV"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from adalflow.datasets.types import Example\n",
-        "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
-        "\n",
-        "\n",
-        "class ObjectCountAdalComponent(adal.AdalComponent):\n",
-        "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
-        "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
-        "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
-        "        super().__init__(task=task, eval_fn=eval_fn)\n",
-        "\n",
-        "    def prepare_task(self, sample: Example):\n",
-        "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
-        "\n",
-        "    def prepare_eval(\n",
-        "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
-        "    ) -> float:\n",
-        "        y_label = -1\n",
-        "        if y_pred and y_pred.data:\n",
-        "            y_label = y_pred.data\n",
-        "        return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}"
-      ],
-      "metadata": {
-        "id": "6mi7lM3U24Eg"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def diagnose(\n",
-        "    model_client: adal.ModelClient,\n",
-        "    model_kwargs: Dict,\n",
-        ") -> Dict:\n",
-        "\n",
-        "    trainset, valset, testset = load_datasets()\n",
-        "    # use max_samples=10 to test the code\n",
-        "\n",
-        "    adal_component = ObjectCountAdalComponent(model_client, model_kwargs)\n",
-        "    trainer = adal.Trainer(adaltask=adal_component)\n",
-        "    trainer.diagnose(dataset=trainset, split=\"train\")\n",
-        "    trainer.diagnose(dataset=valset, split=\"val\")\n",
-        "    trainer.diagnose(dataset=testset, split=\"test\")"
-      ],
-      "metadata": {
-        "id": "eliPeVeM2wcP"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "diagnose(**gpt_3_model)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "nKl9clcb3dFj",
-        "outputId": "676fbb96-c70b-40ab-ea15-93ade1aa9e66"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
-            "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
-            "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
-            "all_generators: [('llm_counter', Generator(\n",
-            "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "))]\n",
-            "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/llm_counter_call.jsonl\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5957.82it/s]\n",
-            "Evaluating step(0): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sorted_indices: [8, 16, 23, 25, 31, 47, 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49]\n",
-            "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
-            "Loading log file: llm_counter_call.jsonl\n",
-            "Total error samples: 6\n",
-            "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
-            "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
-            "all_generators: [('llm_counter', Generator(\n",
-            "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "))]\n",
-            "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val/llm_counter_call.jsonl\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3203.76it/s]\n",
-            "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:15<00:00,  3.26it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sorted_indices: [1, 2, 5, 10, 24, 36, 38, 42, 44, 47, 0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 41, 43, 45, 46, 48, 49]\n",
-            "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
-            "Loading log file: llm_counter_call.jsonl\n",
-            "Total error samples: 10\n",
-            "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
-            "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
-            "all_generators: [('llm_counter', Generator(\n",
-            "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "))]\n",
-            "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test/llm_counter_call.jsonl\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5545.09it/s]\n",
-            "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sorted_indices: [7, 18, 19, 20, 23, 24, 25, 43, 58, 59, 63, 74, 75, 79, 85, 97, 99, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 98]\n",
-            "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
-            "Loading log file: llm_counter_call.jsonl\n",
-            "Total error samples: 17\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
+      "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
+      "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
+      "all_generators: [('llm_counter', Generator(\n",
+      "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "))]\n",
+      "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/llm_counter_call.jsonl\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "source": [
-        "Now, you can go to `/content/adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/stats.json` to view the average score for each split. And also the `diagnose.json` for different errors.\n",
-        "\n",
-        "Here is the overall score for each split.\n",
-        "\n",
-        "| Train  | Val| Test |\n",
-        "|:--------- |:--------:| ---------:|\n",
-        "| 0.88      | 0.8   |    0.83  |\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "dSu4VQri3y3D"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 🐛 Debug"
-      ],
-      "metadata": {
-        "id": "1vzJyp-W0z7I"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## ✅ Train\n",
-        "\n",
-        "Now, let's start training."
-      ],
-      "metadata": {
-        "id": "TmlCvJu804dJ"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from adalflow.datasets.types import Example\n",
-        "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
-        "\n",
-        "\n",
-        "class ObjectCountAdalComponent(adal.AdalComponent):\n",
-        "    def __init__(\n",
-        "        self,\n",
-        "        model_client: adal.ModelClient,\n",
-        "        model_kwargs: Dict,\n",
-        "        backward_engine_model_config: Dict,\n",
-        "        teacher_model_config: Dict,\n",
-        "        text_optimizer_model_config: Dict,\n",
-        "    ):\n",
-        "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
-        "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
-        "        loss_fn = adal.EvalFnToTextLoss(\n",
-        "            eval_fn=eval_fn,\n",
-        "            eval_fn_desc=\"exact_match: 1 if str(y) == str(y_gt) else 0\",\n",
-        "        )\n",
-        "        super().__init__(task=task, eval_fn=eval_fn, loss_fn=loss_fn)\n",
-        "\n",
-        "        self.backward_engine_model_config = backward_engine_model_config\n",
-        "        self.teacher_model_config = teacher_model_config\n",
-        "        self.text_optimizer_model_config = text_optimizer_model_config\n",
-        "\n",
-        "    def prepare_task(self, sample: Example):\n",
-        "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
-        "\n",
-        "    def prepare_eval(\n",
-        "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
-        "    ) -> float:\n",
-        "        y_label = -1\n",
-        "        if y_pred and y_pred.data:\n",
-        "            y_label = y_pred.data\n",
-        "        return self.eval_fn(y=y_label, y_gt=sample.answer)\n",
-        "\n",
-        "    def prepare_loss(self, sample: Example, pred: adal.Parameter):\n",
-        "        # prepare gt parameter\n",
-        "        y_gt = adal.Parameter(\n",
-        "            name=\"y_gt\",\n",
-        "            data=sample.answer,\n",
-        "            eval_input=sample.answer,\n",
-        "            requires_opt=False,\n",
-        "        )\n",
-        "\n",
-        "        # pred's full_response is the output of the task pipeline which is GeneratorOutput\n",
-        "        pred.eval_input = pred.full_response.data\n",
-        "        return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}"
-      ],
-      "metadata": {
-        "id": "4TWCn0did6-K"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def train(\n",
-        "    train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle\n",
-        "    raw_shots: int = 0,\n",
-        "    bootstrap_shots: int = 1,\n",
-        "    max_steps=1,\n",
-        "    num_workers=4,\n",
-        "    strategy=\"random\",\n",
-        "    optimization_order=\"sequential\",\n",
-        "    debug=False,\n",
-        "    resume_from_ckpt=None,\n",
-        "    exclude_input_fields_from_bootstrap_demos=False,\n",
-        "):\n",
-        "    adal_component = ObjectCountAdalComponent(\n",
-        "        **gpt_3_model,\n",
-        "        teacher_model_config=gpt_4o_model,\n",
-        "        text_optimizer_model_config=gpt_4o_model,\n",
-        "        backward_engine_model_config=gpt_4o_model\n",
-        "    )\n",
-        "    print(adal_component)\n",
-        "    trainer = adal.Trainer(\n",
-        "        train_batch_size=train_batch_size,\n",
-        "        adaltask=adal_component,\n",
-        "        strategy=strategy,\n",
-        "        max_steps=max_steps,\n",
-        "        num_workers=num_workers,\n",
-        "        raw_shots=raw_shots,\n",
-        "        bootstrap_shots=bootstrap_shots,\n",
-        "        debug=debug,\n",
-        "        weighted_sampling=True,\n",
-        "        optimization_order=optimization_order,\n",
-        "        exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,\n",
-        "    )\n",
-        "    print(trainer)\n",
-        "\n",
-        "    train_dataset, val_dataset, test_dataset = load_datasets()\n",
-        "    trainer.fit(\n",
-        "        train_dataset=train_dataset,\n",
-        "        val_dataset=val_dataset,\n",
-        "        test_dataset=test_dataset,\n",
-        "        debug=debug,\n",
-        "        resume_from_ckpt=resume_from_ckpt,\n",
-        "    )\n"
-      ],
-      "metadata": {
-        "id": "dezwX2yn1eQS"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "We use `Sequential` in default, we will end up with 24 steps in total, 12 for text optimizer and 12 for the demo optimizer."
-      ],
-      "metadata": {
-        "id": "NGKYozGt60Pp"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
-        "      raw_shots=0, bootstrap_shots=1,\n",
-        "      exclude_input_fields_from_bootstrap_demos=True\n",
-        "      )"
-      ],
-      "metadata": {
-        "id": "yDwLwL0L7Rsw",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "1b7e413b-a1d3-4388-fc0c-ca4b1c072585"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountAdalComponent(\n",
-            "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "  (task): ObjectCountTaskPipeline(\n",
-            "    (llm_counter): Generator(\n",
-            "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "      (prompt): Prompt(\n",
-            "        template: <START_OF_SYSTEM_PROMPT>\n",
-            "        {{system_prompt}}\n",
-            "        {# Few shot demos #}\n",
-            "        {% if few_shot_demos is not none %}\n",
-            "        Here are some examples:\n",
-            "        {{few_shot_demos}}\n",
-            "        {% endif %}\n",
-            "        <END_OF_SYSTEM_PROMPT>\n",
-            "        <START_OF_USER>\n",
-            "        {{input_str}}\n",
-            "        <END_OF_USER>\n",
-            "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "      )\n",
-            "      (model_client): OpenAIClient()\n",
-            "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "    )\n",
-            "  )\n",
-            "  (loss_fn): EvalFnToTextLoss()\n",
-            ")\n",
-            "Trainer(\n",
-            "  (adaltask): ObjectCountAdalComponent(\n",
-            "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "    (task): ObjectCountTaskPipeline(\n",
-            "      (llm_counter): Generator(\n",
-            "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "        (prompt): Prompt(\n",
-            "          template: <START_OF_SYSTEM_PROMPT>\n",
-            "          {{system_prompt}}\n",
-            "          {# Few shot demos #}\n",
-            "          {% if few_shot_demos is not none %}\n",
-            "          Here are some examples:\n",
-            "          {{few_shot_demos}}\n",
-            "          {% endif %}\n",
-            "          <END_OF_SYSTEM_PROMPT>\n",
-            "          <START_OF_USER>\n",
-            "          {{input_str}}\n",
-            "          <END_OF_USER>\n",
-            "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "        )\n",
-            "        (model_client): OpenAIClient()\n",
-            "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "      )\n",
-            "    )\n",
-            "    (loss_fn): EvalFnToTextLoss()\n",
-            "  )\n",
-            ")\n",
-            "raw_shots: 0, bootstrap_shots: 1\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Backward engine configured for all generators.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 6482.70it/s]\n",
-            "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:00<00:00, 347.01it/s]\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2017.67it/s]\n",
-            "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:00<00:00, 286.59it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Initial validation score: 0.8\n",
-            "Initial test score: 0.83\n",
-            "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
-            "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training Step: 1:   0%|          | 0/13 [00:00<?, ?it/s]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.39it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1489.32it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.46it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12865.96it/s]\n",
-            "Training Step: 2:   8%|▊         | 1/13 [00:00<00:01,  8.29it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Skipping batch 0 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 384.73it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 927.64it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 754.71it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12087.33it/s]\n",
-            "Training Step: 3:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Skipping batch 1 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.44it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 2761.68it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 810.38it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11320.66it/s]\n",
-            "Training Step: 4:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Skipping batch 2 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 234.44it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 2487.72it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1024.88it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12018.06it/s]\n",
-            "Training Step: 5:  31%|███       | 4/13 [00:00<00:00, 11.90it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Skipping batch 3 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 4552.84it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 392.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 770.69it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 0.0\n",
-            "Subset loss backward time: 5.383355617523193\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 225.14it/s]\n",
-            "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.43it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.28it/s]\n",
-            "Evaluating step(4): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:03<?, ?it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 1.0 >= 0.75\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1139.66it/s]\n",
-            "Evaluating step(5): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:16<00:00,  3.04it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer step: 0.84 > 0.8\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 1658.72it/s]\n",
-            "Evaluating step(4): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:29<00:00,  3.37it/s]\n",
-            "Training Step: 6:  38%|███▊      | 5/13 [00:56<02:18, 17.27s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.97it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 494.99it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 805.09it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 4.081957817077637\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 538.35it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.13it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.13s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 151.18it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 204.61it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.66s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 698.62it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.41it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.61s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 116.83it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.50it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.88s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 399.65it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.09it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.69s/it]\n",
-            "Training Step: 7:  46%|████▌     | 6/13 [01:09<01:53, 16.18s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 59.06it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 410.78it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 4694.24it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.875\n",
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 3.0843119621276855\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 154.50it/s]\n",
-            "Evaluating step(6): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.52it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 279.47it/s]\n",
-            "Evaluating step(6): 0.875 across 8 samples, Max potential: 0.875: 100%|██████████| 8/8 [00:01<00:00,  4.43it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:04<?, ?it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 0.875 >= 0.875\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2336.58it/s]\n",
-            "Evaluating step(7): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:17<00:00,  2.88it/s]\n",
-            "Training Step: 8:  54%|█████▍    | 7/13 [01:37<01:58, 19.81s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.84 <= 0.84\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 148.75it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 345.11it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7550.50it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.9166666666666666\n",
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 2.337067127227783\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 193.84it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.16it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.39s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 147.89it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.04it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:04<00:07,  2.41s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 423.61it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 556.86it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.78s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 532.41it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 522.78it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 284.18it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 160.35it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.59s/it]\n",
-            "Training Step: 9:  62%|██████▏   | 8/13 [01:50<01:27, 17.55s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.73it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.62it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 342.85it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7157.52it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 14\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.875\n",
-            "Moving batch correct size: 14\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 7.823317050933838\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 166.50it/s]\n",
-            "Evaluating step(8): 0.8333 across 6 samples, Max potential: 0.8333: 100%|██████████| 6/6 [00:02<00:00,  2.78it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 0.8333333333333334 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 16/16 [00:00<00:00, 481.75it/s]\n",
-            "Evaluating step(8): 0.875 across 16 samples, Max potential: 0.875: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:06<?, ?it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 0.875 >= 0.875\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1112.82it/s]\n",
-            "Evaluating step(9): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer step: 0.86 > 0.84\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2395.58it/s]\n",
-            "Evaluating step(8): 0.87 across 100 samples, Max potential: 0.87: 100%|██████████| 100/100 [00:30<00:00,  3.30it/s]\n",
-            "Training Step: 10:  69%|██████▉   | 9/13 [02:52<02:04, 31.23s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.83it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 655.18it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1241.84it/s]\n",
-            "Training Step: 11:  77%|███████▋  | 10/13 [02:55<01:07, 22.43s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Skipping batch 9 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 757.71it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1320.62it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Subset loss backward time: 3.768970012664795\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 125.10it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.77it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.19s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 571.28it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 429.07it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.58s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to categories and quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 111.64it/s]\n",
-            "Evaluating step(10): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.63it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 455.77it/s]\n",
-            "Evaluating step(10): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  5.14it/s]\n",
-            "Proposing:  40%|████      | 2/5 [00:06<00:09,  3.17s/it]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 1.0 >= 0.75\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1732.93it/s]\n",
-            "Evaluating step(11): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:18<00:04,  2.21it/s]\n",
-            "Training Step: 12:  85%|████████▍ | 11/13 [03:24<00:49, 24.61s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.8048780487804879 <= 0.86\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 128.86it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.24it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 470.20it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2608.40it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 6\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 6\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 6.722561836242676\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 265.78it/s]\n",
-            "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  3.58it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.65s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 396.33it/s]\n",
-            "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 354.51it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.80s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 789.39it/s]\n",
-            "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 233.79it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 181.12it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.13it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:08<00:02,  2.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 807.04it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 275.78it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:10<00:00,  2.01s/it]\n",
-            "Training Step: 12:  92%|█████████▏| 12/13 [03:43<00:18, 18.61s/it]\n",
-            "Epoch: 100%|██████████| 1/1 [03:43<00:00, 223.37s/it]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n",
-            "Reached max steps\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Starting step: 12\n",
-            "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training Step: 13:   0%|          | 0/12 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 13\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 158.10it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.35it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
-            "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 490.46it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1656.19it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.40it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.77it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 365.97it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9294.86it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['c42fea48-1b90-4388-92c4-b65b4356a3a2']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Trombone: 1\\n\\n  2. Violin: 1\\n\\n  3. Clarinet: 1\\n\\n  4. Accordion: 1\\n\\n  5. Flutes: 4\\n\\n  6. Trumpet: 1\\n\\n  7. Drums: 2\\n\\n  8. Piano: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 + 2 + 1 = 12\\n\\n\\n  Answer: 12'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2578.13it/s]\n",
-            "Evaluating step(13): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:23<00:13,  1.35it/s]\n",
-            "Training Step: 14:  17%|█▋        | 2/12 [00:27<04:35, 27.54s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 14\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.94it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.92it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n",
-            "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.\n",
-            "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 443.10it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3302.60it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 114.14it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.59it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 685.93it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5111.89it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1784.60it/s]\n",
-            "Evaluating step(14): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:19<00:11,  1.61it/s]\n",
-            "Training Step: 15:  33%|███▎      | 4/12 [00:52<02:10, 16.36s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 15\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.67it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.12it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
-            "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.02it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 665.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1875.18it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 160.86it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.14it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 621.42it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9054.08it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2982.93it/s]\n",
-            "Evaluating step(15): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 495.97it/s]\n",
-            "Training Step: 16:  42%|████▏     | 5/12 [00:56<01:03,  9.03s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 16\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 127.68it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
-            "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
-            "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 534.68it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.71it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10453.09it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.85it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.52it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 560.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1250.72it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3902.04it/s]\n",
-            "Evaluating step(16): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 398.91it/s]\n",
-            "Training Step: 17:  58%|█████▊    | 7/12 [00:58<00:35,  7.16s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 17\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rLoading Data: 100%|██████████| 4/4 [00:00<00:00, 106.99it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training:  75%|███████▌  | 3/4 [00:00<00:00,  3.09it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 334.77it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 874.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 370.55it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.81it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 482.84it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 645.40it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2888.08it/s]\n",
-            "Evaluating step(17): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 221.76it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 18:  67%|██████▋   | 8/12 [01:02<00:19,  4.87s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 18\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.28it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n",
-            "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n",
-            "\n",
-            "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 585.96it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.18it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1038.07it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 250.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.18it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 438.82it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2456.40it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2422.27it/s]\n",
-            "Evaluating step(18): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 456.47it/s]\n",
-            "Training Step: 19:  75%|███████▌  | 9/12 [01:05<00:13,  4.41s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 19\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 57.52it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n",
-            "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.16it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.38it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6143.25it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 107.12it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.70it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10505.46it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3644.75it/s]\n",
-            "Evaluating step(19): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 275.17it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 20:  92%|█████████▏| 11/12 [01:09<00:04,  4.32s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 20\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 125.16it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n",
-            "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 328.35it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 999.36it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.24it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 353.26it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 391.07it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1763.23it/s]\n",
-            "Evaluating step(20): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:17<00:18,  1.38it/s]\n",
-            "Training Step: 21: 100%|██████████| 12/12 [01:34<00:00,  7.82s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.68 <= 0.86, revert\n",
-            "Training Step: 21\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.10it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
-            "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
-            "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.56it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 187.26it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2595.49it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 129.91it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.30it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 689.23it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3183.48it/s]\n",
-            "Evaluating step(21): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 438.75it/s]\n",
-            "Training Step: 22: : 13it [01:38,  6.76s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 22\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 86.81it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
-            "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 143.58it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 842.95it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.83it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.36it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 326.14it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 307.38it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5440.79it/s]\n",
-            "Evaluating step(22): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:00<00:00, 303.26it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.68 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 23: : 14it [01:42,  6.13s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 23\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 91.93it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
-            "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
-            "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.56it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 63.89it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.61it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.96it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 287.69it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1938.89it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3010.90it/s]\n",
-            "Evaluating step(23): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 423.98it/s]\n",
-            "Training Step: 24: : 16it [01:48,  6.22s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 24\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.52it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 000a3738-1f09-40b0-9f8b-2dec63a3f7f8 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.21it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 106.06it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1513.37it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 265.42it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 171.27it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 862.32it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['840d9ed5-8222-45a9-a406-7445feae9733']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Table: 1\\n\\n  3. Fridge: 1\\n\\n  4. Stove: 1\\n\\n  5. Oven: 1\\n\\n  6. Toaster: 1\\n\\n  7. Couch: 1\\n\\n  8. Cars: 4\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 1 + 1 + 1 + 4 = 11\\n\\n\\n  Answer: 11'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1210.01it/s]\n",
-            "Evaluating step(24): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:18<00:00,  2.69it/s]\n",
-            "Training Step: 24: 100%|██████████| 12/12 [02:15<00:00, 11.26s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.86 <= 0.86, revert\n",
-            "Saved ckpt to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Training time: 359.32386112213135s\n",
-            "ckpt_file: /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5957.82it/s]\n",
+      "Evaluating step(0): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "source": [
-        "Here is our scores for each step:\n",
-        "\n",
-        "\"val_scores\": [\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.84,\n",
-        "        0.84,\n",
-        "        0.84,\n",
-        "        0.84,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86\n",
-        "    ]\n",
-        "\n",
-        "  \"test_scores\": [\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.91,\n",
-        "        0.91,\n",
-        "        0.91,\n",
-        "        0.91,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87\n",
-        "    ]\n",
-        "\n",
-        "\n",
-        "It is normal when the score of the validation does not exactly match to that of the test set. You can also train with just the test set. You can modify the fit arguments as\n",
-        "\n",
-        "```\n",
-        "trainer.fit(\n",
-        "        train_dataset=train_dataset,\n",
-        "        val_dataset=test_dataset,\n",
-        "        # test_dataset=test_dataset,\n",
-        "        debug=debug,\n",
-        "        resume_from_ckpt=resume_from_ckpt,\n",
-        "    )\n",
-        "```"
-      ],
-      "metadata": {
-        "id": "KAyFhzrG_J4l"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# 🔥 Resume Checkpoint\n",
-        "\n",
-        "We might want to continue from the earlier step and to train more steps\n",
-        "\n",
-        "This is easy to do.\n",
-        "\n",
-        "**Note: Ensure you copy the path you had, and replace it, as your run might create a different file name.**"
-      ],
-      "metadata": {
-        "id": "953BV81y0JFv"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n",
-        "\n",
-        "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
-        "                raw_shots=0, bootstrap_shots=1,\n",
-        "                resume_from_ckpt=ckpt_path,\n",
-        "                exclude_input_fields_from_bootstrap_demos=True)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "kde1V1AE7Ty0",
-        "outputId": "52d69b69-0a3a-4780-ca26-25956cc023c7"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountAdalComponent(\n",
-            "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "  (task): ObjectCountTaskPipeline(\n",
-            "    (llm_counter): Generator(\n",
-            "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "      (prompt): Prompt(\n",
-            "        template: <START_OF_SYSTEM_PROMPT>\n",
-            "        {{system_prompt}}\n",
-            "        {# Few shot demos #}\n",
-            "        {% if few_shot_demos is not none %}\n",
-            "        Here are some examples:\n",
-            "        {{few_shot_demos}}\n",
-            "        {% endif %}\n",
-            "        <END_OF_SYSTEM_PROMPT>\n",
-            "        <START_OF_USER>\n",
-            "        {{input_str}}\n",
-            "        <END_OF_USER>\n",
-            "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "      )\n",
-            "      (model_client): OpenAIClient()\n",
-            "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "    )\n",
-            "  )\n",
-            "  (loss_fn): EvalFnToTextLoss()\n",
-            ")\n",
-            "Trainer(\n",
-            "  (adaltask): ObjectCountAdalComponent(\n",
-            "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "    (task): ObjectCountTaskPipeline(\n",
-            "      (llm_counter): Generator(\n",
-            "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "        (prompt): Prompt(\n",
-            "          template: <START_OF_SYSTEM_PROMPT>\n",
-            "          {{system_prompt}}\n",
-            "          {# Few shot demos #}\n",
-            "          {% if few_shot_demos is not none %}\n",
-            "          Here are some examples:\n",
-            "          {{few_shot_demos}}\n",
-            "          {% endif %}\n",
-            "          <END_OF_SYSTEM_PROMPT>\n",
-            "          <START_OF_USER>\n",
-            "          {{input_str}}\n",
-            "          <END_OF_USER>\n",
-            "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "        )\n",
-            "        (model_client): OpenAIClient()\n",
-            "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "      )\n",
-            "    )\n",
-            "    (loss_fn): EvalFnToTextLoss()\n",
-            "  )\n",
-            ")\n",
-            "raw_shots: 0, bootstrap_shots: 1\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Backward engine configured for all generators.\n",
-            "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training Step: 27:   0%|          | 0/13 [00:00<?, ?it/s]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.64it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1073.40it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.14it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1126.21it/s]\n",
-            "Training Step: 28:   0%|          | 0/13 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Skipping batch 0 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 604.56it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.83it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 540.00it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1445.81it/s]\n",
-            "Training Step: 29:  15%|█▌        | 2/13 [00:02<00:12,  1.15s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Skipping batch 1 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 318.87it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.06it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 458.88it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1186.26it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 4.518843650817871\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 142.52it/s]\n",
-            "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.56it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.99s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities specified in the input. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 118.95it/s]\n",
-            "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.76it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:05<00:08,  2.85s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 129.26it/s]\n",
-            "Evaluating step(2): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.54it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 263.51it/s]\n",
-            "Evaluating step(2): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n",
-            "Proposing:  40%|████      | 2/5 [00:10<00:15,  5.11s/it]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 1.0 >= 0.75\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2287.37it/s]\n",
-            "Evaluating step(29): 0.8158 across 38 samples, Max potential: 0.86:  76%|███████▌  | 38/50 [00:17<00:05,  2.17it/s]\n",
-            "Training Step: 30:  23%|██▎       | 3/13 [00:35<02:25, 14.59s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.7948717948717948 <= 0.86\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.93it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.69it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 603.76it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8825.47it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.875\n",
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Subset loss backward time: 2.2182435989379883\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure you account for all items. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 160.12it/s]\n",
-            "Evaluating step(3): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.72it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.83s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 89.23it/s]\n",
-            "Evaluating step(3): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.66it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 281.73it/s]\n",
-            "Evaluating step(3): 1.0 across 8 samples, Max potential: 1.0: 100%|██████████| 8/8 [00:02<00:00,  2.96it/s]\n",
-            "Proposing:  20%|██        | 1/5 [00:08<00:34,  8.54s/it]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 1.0 >= 0.875\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1910.10it/s]\n",
-            "Evaluating step(30): 0.72 across 25 samples, Max potential: 0.86:  50%|█████     | 25/50 [00:18<00:18,  1.38it/s]\n",
-            "Training Step: 31:  31%|███       | 4/13 [01:05<03:03, 20.39s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.6923076923076923 <= 0.86\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 310.31it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.32it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12336.19it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.9166666666666666\n",
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 2.028568983078003\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 121.52it/s]\n",
-            "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.10it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 12/12 [00:00<00:00, 724.90it/s]\n",
-            "Evaluating step(4): 1.0 across 12 samples, Max potential: 1.0: 100%|██████████| 12/12 [00:03<00:00,  3.66it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:05<?, ?it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 1.0 >= 0.9166666666666666\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2233.56it/s]\n",
-            "Evaluating step(31): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:16<00:01,  2.81it/s]\n",
-            "Training Step: 32:  38%|███▊      | 5/13 [01:31<02:58, 22.30s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.8333333333333334 <= 0.86\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 269.31it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.20it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 606.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.58it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 15\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.9375\n",
-            "Moving batch correct size: 15\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 3.2150633335113525\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 130.57it/s]\n",
-            "Evaluating step(5): 0.5 across 2 samples, Max potential: 0.6667:  33%|███▎      | 1/3 [00:01<00:02,  1.39s/it]INFO:backoff:Backing off call(...) for 0.2s (openai.InternalServerError: <html>\n",
-            "<head><title>500 Internal Server Error</title></head>\n",
-            "<body>\n",
-            "<center><h1>500 Internal Server Error</h1></center>\n",
-            "<hr><center>nginx</center>\n",
-            "</body>\n",
-            "</html>)\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:50<00:00, 16.89s/it]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:52<03:28, 52.11s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 645.05it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 298.94it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:53<01:07, 22.46s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 751.40it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 360.88it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:54<00:25, 12.66s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 332.13it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 276.08it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:55<00:08,  8.12s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 440.13it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 235.96it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:57<00:00, 11.41s/it]\n",
-            "Training Step: 33:  46%|████▌     | 6/13 [02:33<04:07, 35.35s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 317.05it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 676.47it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 543.36it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1518.44it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 7.857504606246948\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 282.66it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:02<00:00,  2.75it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.26s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 687.22it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 539.26it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:04<00:06,  2.16s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 608.62it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 246.48it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.68s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 417.60it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 422.96it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.58s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 464.91it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 269.93it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.67s/it]\n",
-            "Training Step: 34:  54%|█████▍    | 7/13 [02:49<02:55, 29.23s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 104.68it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 556.85it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14230.04it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 6.2225048542022705\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 584.16it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  4.41it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.54s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 943.25it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 367.37it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.65s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 802.76it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 290.57it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 736.81it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 352.92it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.31s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 596.84it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 250.75it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.45s/it]\n",
-            "Training Step: 35:  62%|██████▏   | 8/13 [03:04<02:04, 24.82s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 70.79it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 388.55it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2027.46it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 5.618266582489014\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 304.00it/s]\n",
-            "Evaluating step(8): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.79it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 128.97it/s]\n",
-            "Evaluating step(8): 1.0 across 6 samples, Max potential: 1.0: 100%|██████████| 6/6 [00:01<00:00,  3.62it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 20/20 [00:00<00:00, 649.93it/s]\n",
-            "Evaluating step(8): 0.95 across 20 samples, Max potential: 0.95: 100%|██████████| 20/20 [00:02<00:00,  8.93it/s]\n",
-            "Proposing:  20%|██        | 1/5 [00:08<00:35,  8.79s/it]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass full check: 0.95 >= 0.9\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2667.62it/s]\n",
-            "Evaluating step(35): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:00<00:00, 559.52it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.8333333333333334 <= 0.86\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training Step: 36:  69%|██████▉   | 9/13 [03:21<01:29, 22.39s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 154.85it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.33it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.06it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1798.78it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.95\n",
-            "Moving batch correct size: 19\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 2.553833246231079\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 228.47it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.44it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.47s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 700.57it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 207.56it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.69s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 782.91it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 712.51it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 269.05it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 266.32it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.40s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 466.64it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 498.14it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.48s/it]\n",
-            "Training Step: 37:  77%|███████▋  | 10/13 [03:33<00:56, 18.97s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.54it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.77it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 561.81it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.40it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 23\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.95\n",
-            "Moving batch correct size: 19\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 2.35148024559021\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 139.22it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.95it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:01<00:07,  1.81s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 277.60it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 561.39it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:02<00:04,  1.42s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 736.01it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 168.63it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:03<00:02,  1.24s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 441.77it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 518.09it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.19s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 396.70it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 199.84it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]\n",
-            "Training Step: 38:  85%|████████▍ | 11/13 [03:43<00:32, 16.20s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 138.49it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  6.41it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.01it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10665.74it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 11.797855138778687\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 221.09it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:01<00:00,  4.45it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.46s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 690.80it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 309.16it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.61s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 488.13it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 365.81it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.36s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 693.52it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 272.61it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.35s/it]\u001b[A"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 767.58it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 719.89it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.41s/it]\n",
-            "Training Step: 38:  92%|█████████▏| 12/13 [04:02<00:20, 20.21s/it]\n",
-            "Epoch: 100%|██████████| 1/1 [04:02<00:00, 242.58s/it]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n",
-            "Reached max steps\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Starting step: 38\n",
-            "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training Step: 39:   0%|          | 0/12 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 39\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.31it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n",
-            "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
-            "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
-            "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 812.53it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 2283.86it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11023.14it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 294.28it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 485.47it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11015.90it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['0e8910c8-703d-4766-a483-c5691125fd03']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Fridge\\n\\n  2. Chair\\n\\n  3. Bed\\n\\n  4. Oven\\n\\n  5. Microwave\\n\\n  6. Car\\n\\n\\n  There are 6 objects in total.\\n\\n\\n  Answer: 6'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3194.64it/s]\n",
-            "Evaluating step(39): 0.6818 across 22 samples, Max potential: 0.86:  44%|████▍     | 22/50 [00:15<00:19,  1.45it/s]\n",
-            "Training Step: 40:  17%|█▋        | 2/12 [00:17<02:58, 17.85s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.6521739130434783 <= 0.86, revert\n",
-            "Training Step: 40\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 697.57it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
-            "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 562.43it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 577.17it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9709.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 142.07it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.41it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 311.77it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 713.44it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3973.84it/s]\n",
-            "Evaluating step(40): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 440.54it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 41:  33%|███▎      | 4/12 [00:22<00:49,  6.19s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 41\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 155.20it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n",
-            "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n",
-            "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n",
-            "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1098.13it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 521.96it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10292.77it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.25it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.39it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.31it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1397.05it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3444.16it/s]\n",
-            "Evaluating step(41): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 318.28it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 42:  42%|████▏     | 5/12 [00:24<00:25,  3.71s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 42\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.35it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n",
-            "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n",
-            "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 522.44it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 344.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14755.69it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.06it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.03it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.94it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5319.35it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4718.96it/s]\n",
-            "Evaluating step(42): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 404.64it/s]\n",
-            "Training Step: 43:  58%|█████▊    | 7/12 [00:27<00:17,  3.51s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
-            "Training Step: 43\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 261.59it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n",
-            "\n",
-            "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 428.10it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 296.10it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11374.38it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.89it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 447.30it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 475.76it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4960.15it/s]\n",
-            "Evaluating step(43): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 464.52it/s]\n",
-            "Training Step: 44:  67%|██████▋   | 8/12 [00:30<00:10,  2.51s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
-            "Training Step: 44\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 237.83it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
-            "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
-            "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1138.91it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 394.77it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 443.51it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.66it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.52it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 373.33it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 830.43it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1197.95it/s]\n",
-            "Evaluating step(44): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:22<00:04,  1.87it/s]\n",
-            "Training Step: 45:  75%|███████▌  | 9/12 [00:57<00:24,  8.31s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.813953488372093 <= 0.86, revert\n",
-            "Training Step: 45\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.91it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n",
-            "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
-            "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n",
-            "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 731.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 244.23it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 395.27it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 140.54it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 448.16it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 658.37it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2621.44it/s]\n",
-            "Evaluating step(45): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 306.53it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 46:  92%|█████████▏| 11/12 [00:59<00:06,  6.78s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 46\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 256.89it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
-            "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 426.47it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.65it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 380.40it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 251.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 411.12it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 511.05it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4016.92it/s]\n",
-            "Evaluating step(46): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:00<00:00, 303.81it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.813953488372093 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 47: 100%|██████████| 12/12 [01:01<00:00,  4.42s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 47\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 96.23it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
-            "\n",
-            "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 341.47it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 167.75it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 846.95it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.09it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 191.47it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 923.91it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2836.52it/s]\n",
-            "Evaluating step(47): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 371.59it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 48: : 13it [01:07,  4.63s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 48\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 189.96it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n",
-            "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 295.41it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\n",
-            "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.24it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1621.93it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 153.47it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.08it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 344.25it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s list and count the vegetables mentioned:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (two cabbages)\\n\\n  4. Garlic\\n\\n  5. Carrot\\n\\n  6. Broccoli (head of broccoli)\\n\\n  7. Potato\\n\\n  8. Celery (stalk of celery)\\n\\n  9. Lettuce (lettuce head)\\n\\n\\n  Now, let''s count each vegetable:\\n\\n\\n  1. Yam: 1\\n\\n  2. Cauliflower: 1\\n\\n  3. Cabbages: 2\\n\\n  4. Garlic: 1\\n\\n  5. Carrot: 1\\n\\n  6. Broccoli: 1\\n\\n  7. Potato: 1\\n\\n  8. Celery: 1\\n\\n  9. Lettuce: 1\\n\\n\\n  Adding them up:\\n\\n\\n  1 + 1 + 2 + 1 + 1 + 1 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1430.74it/s]\n",
-            "Evaluating step(48): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:28<00:07,  1.41it/s]\n",
-            "Training Step: 49: : 14it [01:39, 11.59s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
-            "Training Step: 49\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.71it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
-            "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
-            "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 421.38it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 121.46it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1767.14it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 166.47it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.02it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 206.20it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 983.31it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3168.14it/s]\n",
-            "Evaluating step(49): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 492.44it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 50: : 16it [01:42,  9.33s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 50\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 108.30it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n",
-            "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n",
-            "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 220.83it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.57it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.12it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.93it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.82it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2261.91it/s]\n",
-            "Evaluating step(50): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 281.78it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 50: 100%|██████████| 12/12 [01:49<00:00,  9.15s/it]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Training time: 352.5873613357544s\n",
-            "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        }
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sorted_indices: [8, 16, 23, 25, 31, 47, 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49]\n",
+      "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
+      "Loading log file: llm_counter_call.jsonl\n",
+      "Total error samples: 6\n",
+      "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
+      "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
+      "all_generators: [('llm_counter', Generator(\n",
+      "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "))]\n",
+      "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val/llm_counter_call.jsonl\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "source": [
-        "I decide to try more, this time, using strategy \"random\". And in the bootstrap demo, there is one shot, but I ensure I also add the \"input\" in the demonstration."
-      ],
-      "metadata": {
-        "id": "m5fZGQqLE78r"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "train(debug=False, max_steps=12, strategy=\"random\",\n",
-        "                raw_shots=0, bootstrap_shots=1,\n",
-        "                resume_from_ckpt=ckpt_path,\n",
-        "                exclude_input_fields_from_bootstrap_demos=False)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "78JAv4ULEn07",
-        "outputId": "e87bb360-fc26-4dbd-d163-86ab32c292df"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountAdalComponent(\n",
-            "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "  (task): ObjectCountTaskPipeline(\n",
-            "    (llm_counter): Generator(\n",
-            "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "      (prompt): Prompt(\n",
-            "        template: <START_OF_SYSTEM_PROMPT>\n",
-            "        {{system_prompt}}\n",
-            "        {# Few shot demos #}\n",
-            "        {% if few_shot_demos is not none %}\n",
-            "        Here are some examples:\n",
-            "        {{few_shot_demos}}\n",
-            "        {% endif %}\n",
-            "        <END_OF_SYSTEM_PROMPT>\n",
-            "        <START_OF_USER>\n",
-            "        {{input_str}}\n",
-            "        <END_OF_USER>\n",
-            "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "      )\n",
-            "      (model_client): OpenAIClient()\n",
-            "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "    )\n",
-            "  )\n",
-            "  (loss_fn): EvalFnToTextLoss()\n",
-            ")\n",
-            "Trainer(\n",
-            "  (adaltask): ObjectCountAdalComponent(\n",
-            "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "    (task): ObjectCountTaskPipeline(\n",
-            "      (llm_counter): Generator(\n",
-            "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "        (prompt): Prompt(\n",
-            "          template: <START_OF_SYSTEM_PROMPT>\n",
-            "          {{system_prompt}}\n",
-            "          {# Few shot demos #}\n",
-            "          {% if few_shot_demos is not none %}\n",
-            "          Here are some examples:\n",
-            "          {{few_shot_demos}}\n",
-            "          {% endif %}\n",
-            "          <END_OF_SYSTEM_PROMPT>\n",
-            "          <START_OF_USER>\n",
-            "          {{input_str}}\n",
-            "          <END_OF_USER>\n",
-            "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "        )\n",
-            "        (model_client): OpenAIClient()\n",
-            "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "      )\n",
-            "    )\n",
-            "    (loss_fn): EvalFnToTextLoss()\n",
-            "  )\n",
-            ")\n",
-            "raw_shots: 0, bootstrap_shots: 1\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Backward engine configured for all generators.\n",
-            "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training Step: 51:   0%|          | 0/13 [00:00<?, ?it/s]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 415.27it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 224.54it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 423.57it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10894.30it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2199.38it/s]\n",
-            "Evaluating step(51): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.97it/s]\n",
-            "Training Step: 52:   8%|▊         | 1/13 [00:18<03:38, 18.20s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.86 <= 0.86\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.10it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 785.01it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 842.02it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6660.27it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1760.33it/s]\n",
-            "Evaluating step(52): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.96it/s]\n",
-            "Training Step: 53:  15%|█▌        | 2/13 [00:36<03:21, 18.28s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.86 <= 0.86\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.26it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 988.41it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 608.29it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1177.76it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2074.29it/s]\n",
-            "Evaluating step(53): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:16<00:00,  3.07it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer step: 0.88 > 0.86\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5848.08it/s]\n",
-            "Evaluating step(53): 0.9 across 100 samples, Max potential: 0.9: 100%|██████████| 100/100 [00:30<00:00,  3.32it/s]\n",
-            "Training Step: 54:  23%|██▎       | 3/13 [01:28<05:35, 33.51s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 297.78it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.95it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 407.40it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8952.62it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1274.72it/s]\n",
-            "Evaluating step(54): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer step: 0.94 > 0.88\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 6831.78it/s]\n",
-            "Evaluating step(54): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:30<00:00,  3.33it/s]\n",
-            "Training Step: 55:  31%|███       | 4/13 [02:21<06:10, 41.21s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 152.84it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.28it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 688.86it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1318.45it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data='You will answer a reasoning question. Carefully count each item and verify your total. List each item individually, ensuring each is counted as \"1\" regardless of quantity mentioned. Show your calculations step by step. The last line of your response should be: \\'Answer: $VALUE\\' where VALUE is a numerical value.', requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2011.16it/s]\n",
-            "Evaluating step(55): 0.8696 across 23 samples, Max potential: 0.94:  46%|████▌     | 23/50 [00:15<00:17,  1.52it/s]\n",
-            "Training Step: 56:  38%|███▊      | 5/13 [02:46<04:43, 35.43s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.8333333333333334 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.66it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 646.55it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2217.45it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4572.35it/s]\n",
-            "Evaluating step(56): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:00<00:00, 390.77it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.94 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 57:  46%|████▌     | 6/13 [02:54<03:02, 26.03s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 145.48it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.52it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.76it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1437.76it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check for any grouped items and count them correctly. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1614.47it/s]\n",
-            "Evaluating step(57): 0.7857 across 14 samples, Max potential: 0.94:  28%|██▊       | 14/50 [00:19<00:50,  1.41s/it]\n",
-            "Training Step: 58:  54%|█████▍    | 7/13 [03:23<02:42, 27.04s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.7333333333333333 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 137.96it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 806.79it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11522.81it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be formatted as: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3560.17it/s]\n",
-            "Evaluating step(58): 0.88 across 25 samples, Max potential: 0.94:  50%|█████     | 25/50 [00:17<00:17,  1.45it/s]\n",
-            "Training Step: 59:  62%|██████▏   | 8/13 [03:47<02:10, 26.06s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.8461538461538461 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.90it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.70it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 552.01it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5648.89it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1770.11it/s]\n",
-            "Evaluating step(59): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:16<00:03,  2.49it/s]\n",
-            "Training Step: 60:  69%|██████▉   | 9/13 [04:13<01:43, 26.00s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.9069767441860465 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 314.86it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.10it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 722.53it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7940.00it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count for precision. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 7188.43it/s]\n",
-            "Evaluating step(60): 0.8966 across 29 samples, Max potential: 0.94:  58%|█████▊    | 29/50 [00:15<00:11,  1.84it/s]\n",
-            "Training Step: 61:  77%|███████▋  | 10/13 [04:35<01:14, 24.87s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.8666666666666667 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 95.68it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.74it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12520.31it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3036.62it/s]\n",
-            "Evaluating step(61): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.89it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.9069767441860465 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 62:  85%|████████▍ | 11/13 [04:44<00:40, 20.14s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.40it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.17it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.11it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14339.50it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5035.06it/s]\n",
-            "Evaluating step(62): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.19it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Optimizer revert: 0.9069767441860465 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 62:  92%|█████████▏| 12/13 [04:51<00:24, 24.28s/it]\n",
-            "Epoch:   0%|          | 0/1 [04:51<?, ?it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Reached max steps\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Starting step: 62\n",
-            "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training Step: 63:   0%|          | 0/12 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 63\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 175.38it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
-            "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.32it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.92it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9063.87it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 132.51it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.85it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 913.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1900.02it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2838.94it/s]\n",
-            "Evaluating step(63): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:31<03:48,  5.20s/it]\n",
-            "Training Step: 64:  17%|█▋        | 2/12 [00:36<06:01, 36.20s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
-            "Training Step: 64\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 173.87it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n",
-            "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace."
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.64it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1138.44it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3232.60it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 151.65it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.21it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 725.72it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10845.00it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2971.02it/s]\n",
-            "Evaluating step(64): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:00<00:00, 136.83it/s]\n",
-            "Training Step: 65:  33%|███▎      | 4/12 [00:41<01:29, 11.21s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
-            "Training Step: 65\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:00<00:00,  2.54it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.89it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.70it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6304.85it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 218.71it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.19it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 858.52it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 768.93it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1635.33it/s]\n",
-            "Evaluating step(65): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Pass validation: 0.96 > 0.94\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 3294.35it/s]\n",
-            "Evaluating step(65): 0.95 across 100 samples, Max potential: 0.95: 100%|██████████| 100/100 [00:39<00:00,  2.51it/s]\n",
-            "Training Step: 66:  42%|████▏     | 5/12 [01:50<02:42, 23.20s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 66\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 186.04it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.01it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.30it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n",
-            "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 636.54it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9420.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.34it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.50it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.28it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 731.61it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1120.89it/s]\n",
-            "Evaluating step(66): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 212.00it/s]\n",
-            "Training Step: 67:  58%|█████▊    | 7/12 [01:55<01:32, 18.51s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n",
-            "Training Step: 67\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 31.60it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  25%|██▌       | 1/4 [00:01<00:05,  1.78s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  75%|███████▌  | 3/4 [00:02<00:00,  1.63it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 420.84it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 533.39it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 48.64it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.64it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 396.85it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8608.11it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4202.88it/s]\n",
-            "Evaluating step(67): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 405.51it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 68:  67%|██████▋   | 8/12 [02:02<00:47, 11.99s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 68\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 77.30it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training:  75%|███████▌  | 3/4 [00:01<00:00,  2.62it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.56it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10831.00it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 179.03it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.09it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 502.04it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 639.84it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3305.62it/s]\n",
-            "Evaluating step(68): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 539.54it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 69:  75%|███████▌  | 9/12 [02:09<00:32, 10.69s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 69\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 84.70it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.26s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
-            "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
-            "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 331.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 488.36it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 274.35it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.51it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 596.31it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14678.23it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4604.98it/s]\n",
-            "Evaluating step(69): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 88.47it/s]\n",
-            "Training Step: 70:  92%|█████████▏| 11/12 [02:13<00:08,  8.97s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Training Step: 70\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 169.70it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.03s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
-            "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.45it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n",
-            "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 285.47it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 288.20it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.75it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 293.12it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1091.27it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4087.46it/s]\n",
-            "Evaluating step(70): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 345.89it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 71: 100%|██████████| 12/12 [02:17<00:00,  6.07s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 71\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.52it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.33s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.37it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  1.92it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.50it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10369.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 43.64it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.11it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1141.46it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4407.91it/s]\n",
-            "Evaluating step(71): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 397.66it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Training Step: 72: : 13it [02:23,  6.04s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Training Step: 72\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 113.31it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.04it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.82it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  2.48it/s]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.86it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 170.72it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 13981.01it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.45it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.46it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 241.42it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 322.68it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3970.90it/s]\n",
-            "Evaluating step(72): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 96.75it/s] \n",
-            "Training Step: 73: : 14it [02:30,  6.33s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Training Step: 73\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 73.23it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:05,  1.97s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
-            "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
-            "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 211.00it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1551.58it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 205.19it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.20it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1059.57it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2226.42it/s]\n",
-            "Evaluating step(73): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 91.13it/s]\n",
-            "Training Step: 74: : 16it [02:35,  6.09s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Training Step: 74\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.65it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:04,  1.36s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
-            "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n",
-            "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.59it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.74it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1086.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.28it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 224.88it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14242.12it/s]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "sampled_augmented_demos: ['b538075d-01af-4b76-b835-9005f3044609']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a blackberry, a raspberry, a peach, a head of broccoli, a plum,\\n  an orange, two bananas, a grape, two garlics, a nectarine, a lettuce head, and an\\n  apple. How many fruits do I have?\\nExample: 'Let''s list each item and identify whether it is a fruit:\\n\\n\\n  1. Blackberry - Fruit\\n\\n  2. Raspberry - Fruit\\n\\n  3. Peach - Fruit\\n\\n  4. Head of broccoli - Not a fruit\\n\\n  5. Plum - Fruit\\n\\n  6. Orange - Fruit\\n\\n  7. Two bananas - Fruits (2 bananas)\\n\\n  8. Grape - Fruit\\n\\n  9. Two garlics - Not fruits\\n\\n  10. Nectarine - Fruit\\n\\n  11. Lettuce head - Not a fruit\\n\\n  12. Apple - Fruit\\n\\n\\n  Now, let''s count the fruits:\\n\\n\\n  1. Blackberry\\n\\n  2. Raspberry\\n\\n  3. Peach\\n\\n  4. Plum\\n\\n  5. Orange\\n\\n  6. Two bananas (counted as 2)\\n\\n  7. Grape\\n\\n  8. Nectarine\\n\\n  9. Apple\\n\\n\\n  Total number of fruits:\\n\\n  1 + 1 + 1 + 1 + 1 + 2 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 634.93it/s]\n",
-            "Evaluating step(74): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:28<05:25,  7.07s/it]\n",
-            "Training Step: 74: 100%|██████████| 12/12 [03:12<00:00, 16.04s/it]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Training time: 484.17421078681946s\n",
-            "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3203.76it/s]\n",
+      "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:15<00:00,  3.26it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sorted_indices: [1, 2, 5, 10, 24, 36, 38, 42, 44, 47, 0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 41, 43, 45, 46, 48, 49]\n",
+      "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
+      "Loading log file: llm_counter_call.jsonl\n",
+      "Total error samples: 10\n",
+      "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
+      "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
+      "all_generators: [('llm_counter', Generator(\n",
+      "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "))]\n",
+      "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test/llm_counter_call.jsonl\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5545.09it/s]\n",
+      "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sorted_indices: [7, 18, 19, 20, 23, 24, 25, 43, 58, 59, 63, 74, 75, 79, 85, 97, 99, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 98]\n",
+      "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
+      "Loading log file: llm_counter_call.jsonl\n",
+      "Total error samples: 17\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "source": [
-        "Finally, we got 96% on the val and 95% on the test!!! This is really close to GPT4o's performance. This took us 72 steps!\n",
-        "\n",
-        "The score is consistent, meaning this is a good prompt.\n",
-        "Here is our final optimized prompt:\n",
-        "\n",
-        "System:\n",
-        "\n",
-        "```\n",
-        "\n",
-        "\"prompt\": [\n",
-        "                {\n",
-        "                    \"id\": \"327b63f0-b532-435a-85d7-6137d4e52c4c\",\n",
-        "                    \"name\": \"llm_counter.system_prompt\",\n",
-        "                    \"data\": \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
-        "                    \"requires_opt\": true\n",
-        "                },\n",
-        "                {\n",
-        "                    \"id\": \"73a3953b-6351-44d8-a36f-7521db346cca\",\n",
-        "                    \"name\": \"llm_counter.few_shot_demos\",\n",
-        "                    \"data\": \"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\",\n",
-        "                    \"requires_opt\": true\n",
-        "                }\n",
-        "            ]\n",
-        "```\n",
-        "\n",
-        "\n",
-        "You will see all steps record from the log."
-      ],
-      "metadata": {
-        "id": "xTB4lO3PFPnP"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Happy Optimizing!!!"
-      ],
-      "metadata": {
-        "id": "Fr0V3XNCHAis"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Issues and feedback\n",
-        "\n",
-        "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
-        "\n",
-        "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
-      ],
-      "metadata": {
-        "id": "3Wnvqs3RyI_z"
-      }
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
     }
-  ]
+   ],
+   "source": [
+    "diagnose(**gpt_3_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "dSu4VQri3y3D"
+   },
+   "source": [
+    "Now, you can go to `/content/adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/stats.json` to view the average score for each split. And also the `diagnose.json` for different errors.\n",
+    "\n",
+    "Here is the overall score for each split.\n",
+    "\n",
+    "| Train  | Val| Test |\n",
+    "|:--------- |:--------:| ---------:|\n",
+    "| 0.88      | 0.8   |    0.83  |\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1vzJyp-W0z7I"
+   },
+   "source": [
+    "## 🐛 Debug"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "TmlCvJu804dJ"
+   },
+   "source": [
+    "## ✅ Train\n",
+    "\n",
+    "Now, let's start training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "4TWCn0did6-K"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.datasets.types import Example\n",
+    "\n",
+    "\n",
+    "class ObjectCountAdalComponent(adal.AdalComponent):# noqa: F811\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        model_client: adal.ModelClient,\n",
+    "        model_kwargs: Dict,\n",
+    "        backward_engine_model_config: Dict,\n",
+    "        teacher_model_config: Dict,\n",
+    "        text_optimizer_model_config: Dict,\n",
+    "    ):\n",
+    "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
+    "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
+    "        loss_fn = adal.EvalFnToTextLoss(\n",
+    "            eval_fn=eval_fn,\n",
+    "            eval_fn_desc=\"exact_match: 1 if str(y) == str(y_gt) else 0\",\n",
+    "        )\n",
+    "        super().__init__(task=task, eval_fn=eval_fn, loss_fn=loss_fn)\n",
+    "\n",
+    "        self.backward_engine_model_config = backward_engine_model_config\n",
+    "        self.teacher_model_config = teacher_model_config\n",
+    "        self.text_optimizer_model_config = text_optimizer_model_config\n",
+    "\n",
+    "    def prepare_task(self, sample: Example):\n",
+    "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
+    "\n",
+    "\n",
+    "    def prepare_eval(\n",
+    "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
+    "    ) -> float:\n",
+    "        y_label = -1\n",
+    "        if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0\n",
+    "            y_label = y_pred.data\n",
+    "        return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n",
+    "\n",
+    "    def prepare_loss(self, sample: Example, pred: adal.Parameter):\n",
+    "        # prepare gt parameter\n",
+    "        y_gt = adal.Parameter(\n",
+    "            name=\"y_gt\",\n",
+    "            data=sample.answer,\n",
+    "            eval_input=sample.answer,\n",
+    "            requires_opt=False,\n",
+    "        )\n",
+    "\n",
+    "        # pred's full_response is the output of the task pipeline which is GeneratorOutput\n",
+    "        pred.eval_input = pred.full_response.data\n",
+    "        return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "dezwX2yn1eQS"
+   },
+   "outputs": [],
+   "source": [
+    "def train(\n",
+    "    train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle\n",
+    "    raw_shots: int = 0,\n",
+    "    bootstrap_shots: int = 1,\n",
+    "    max_steps=1,\n",
+    "    num_workers=4,\n",
+    "    strategy=\"random\",\n",
+    "    optimization_order=\"sequential\",\n",
+    "    debug=False,\n",
+    "    resume_from_ckpt=None,\n",
+    "    exclude_input_fields_from_bootstrap_demos=False,\n",
+    "):\n",
+    "    adal_component = ObjectCountAdalComponent(\n",
+    "        **gpt_3_model,\n",
+    "        teacher_model_config=gpt_4o_model,\n",
+    "        text_optimizer_model_config=gpt_4o_model,\n",
+    "        backward_engine_model_config=gpt_4o_model\n",
+    "    )\n",
+    "    print(adal_component)\n",
+    "    trainer = adal.Trainer(\n",
+    "        train_batch_size=train_batch_size,\n",
+    "        adaltask=adal_component,\n",
+    "        strategy=strategy,\n",
+    "        max_steps=max_steps,\n",
+    "        num_workers=num_workers,\n",
+    "        raw_shots=raw_shots,\n",
+    "        bootstrap_shots=bootstrap_shots,\n",
+    "        debug=debug,\n",
+    "        weighted_sampling=True,\n",
+    "        optimization_order=optimization_order,\n",
+    "        exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,\n",
+    "    )\n",
+    "    print(trainer)\n",
+    "\n",
+    "    train_dataset, val_dataset, test_dataset = load_datasets()\n",
+    "    trainer.fit(\n",
+    "        train_dataset=train_dataset,\n",
+    "        val_dataset=val_dataset,\n",
+    "        test_dataset=test_dataset,\n",
+    "        debug=debug,\n",
+    "        resume_from_ckpt=resume_from_ckpt,\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NGKYozGt60Pp"
+   },
+   "source": [
+    "We use `Sequential` in default, we will end up with 24 steps in total, 12 for text optimizer and 12 for the demo optimizer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "yDwLwL0L7Rsw",
+    "outputId": "1b7e413b-a1d3-4388-fc0c-ca4b1c072585"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountAdalComponent(\n",
+      "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "  (task): ObjectCountTaskPipeline(\n",
+      "    (llm_counter): Generator(\n",
+      "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "      (prompt): Prompt(\n",
+      "        template: <START_OF_SYSTEM_PROMPT>\n",
+      "        {{system_prompt}}\n",
+      "        {# Few shot demos #}\n",
+      "        {% if few_shot_demos is not none %}\n",
+      "        Here are some examples:\n",
+      "        {{few_shot_demos}}\n",
+      "        {% endif %}\n",
+      "        <END_OF_SYSTEM_PROMPT>\n",
+      "        <START_OF_USER>\n",
+      "        {{input_str}}\n",
+      "        <END_OF_USER>\n",
+      "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "      )\n",
+      "      (model_client): OpenAIClient()\n",
+      "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "    )\n",
+      "  )\n",
+      "  (loss_fn): EvalFnToTextLoss()\n",
+      ")\n",
+      "Trainer(\n",
+      "  (adaltask): ObjectCountAdalComponent(\n",
+      "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "    (task): ObjectCountTaskPipeline(\n",
+      "      (llm_counter): Generator(\n",
+      "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "        (prompt): Prompt(\n",
+      "          template: <START_OF_SYSTEM_PROMPT>\n",
+      "          {{system_prompt}}\n",
+      "          {# Few shot demos #}\n",
+      "          {% if few_shot_demos is not none %}\n",
+      "          Here are some examples:\n",
+      "          {{few_shot_demos}}\n",
+      "          {% endif %}\n",
+      "          <END_OF_SYSTEM_PROMPT>\n",
+      "          <START_OF_USER>\n",
+      "          {{input_str}}\n",
+      "          <END_OF_USER>\n",
+      "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "        )\n",
+      "        (model_client): OpenAIClient()\n",
+      "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "      )\n",
+      "    )\n",
+      "    (loss_fn): EvalFnToTextLoss()\n",
+      "  )\n",
+      ")\n",
+      "raw_shots: 0, bootstrap_shots: 1\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Backward engine configured for all generators.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 6482.70it/s]\n",
+      "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:00<00:00, 347.01it/s]\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2017.67it/s]\n",
+      "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:00<00:00, 286.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Initial validation score: 0.8\n",
+      "Initial test score: 0.83\n",
+      "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
+      "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 1:   0%|          | 0/13 [00:00<?, ?it/s]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.39it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1489.32it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.46it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12865.96it/s]\n",
+      "Training Step: 2:   8%|▊         | 1/13 [00:00<00:01,  8.29it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 0 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 384.73it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 927.64it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 754.71it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12087.33it/s]\n",
+      "Training Step: 3:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 1 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.44it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 2761.68it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 810.38it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11320.66it/s]\n",
+      "Training Step: 4:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 2 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 234.44it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 2487.72it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1024.88it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12018.06it/s]\n",
+      "Training Step: 5:  31%|███       | 4/13 [00:00<00:00, 11.90it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 3 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 4552.84it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 392.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 770.69it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 0.0\n",
+      "Subset loss backward time: 5.383355617523193\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 225.14it/s]\n",
+      "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.43it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.28it/s]\n",
+      "Evaluating step(4): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:03<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.75\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1139.66it/s]\n",
+      "Evaluating step(5): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:16<00:00,  3.04it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.84 > 0.8\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 1658.72it/s]\n",
+      "Evaluating step(4): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:29<00:00,  3.37it/s]\n",
+      "Training Step: 6:  38%|███▊      | 5/13 [00:56<02:18, 17.27s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.97it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 494.99it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 805.09it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 4.081957817077637\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 538.35it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.13it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.13s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 151.18it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 204.61it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.66s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 698.62it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.41it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.61s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 116.83it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.50it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.88s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 399.65it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.09it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.69s/it]\n",
+      "Training Step: 7:  46%|████▌     | 6/13 [01:09<01:53, 16.18s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 59.06it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 410.78it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 4694.24it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.875\n",
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 3.0843119621276855\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 154.50it/s]\n",
+      "Evaluating step(6): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 279.47it/s]\n",
+      "Evaluating step(6): 0.875 across 8 samples, Max potential: 0.875: 100%|██████████| 8/8 [00:01<00:00,  4.43it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:04<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 0.875 >= 0.875\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2336.58it/s]\n",
+      "Evaluating step(7): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:17<00:00,  2.88it/s]\n",
+      "Training Step: 8:  54%|█████▍    | 7/13 [01:37<01:58, 19.81s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.84 <= 0.84\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 148.75it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 345.11it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7550.50it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.9166666666666666\n",
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 2.337067127227783\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 193.84it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.16it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.39s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 147.89it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.04it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:04<00:07,  2.41s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 423.61it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 556.86it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.78s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 532.41it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 522.78it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 284.18it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 160.35it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.59s/it]\n",
+      "Training Step: 9:  62%|██████▏   | 8/13 [01:50<01:27, 17.55s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.73it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.62it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 342.85it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7157.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 14\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.875\n",
+      "Moving batch correct size: 14\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 7.823317050933838\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 166.50it/s]\n",
+      "Evaluating step(8): 0.8333 across 6 samples, Max potential: 0.8333: 100%|██████████| 6/6 [00:02<00:00,  2.78it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 0.8333333333333334 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 16/16 [00:00<00:00, 481.75it/s]\n",
+      "Evaluating step(8): 0.875 across 16 samples, Max potential: 0.875: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:06<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 0.875 >= 0.875\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1112.82it/s]\n",
+      "Evaluating step(9): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.86 > 0.84\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2395.58it/s]\n",
+      "Evaluating step(8): 0.87 across 100 samples, Max potential: 0.87: 100%|██████████| 100/100 [00:30<00:00,  3.30it/s]\n",
+      "Training Step: 10:  69%|██████▉   | 9/13 [02:52<02:04, 31.23s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.83it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 655.18it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1241.84it/s]\n",
+      "Training Step: 11:  77%|███████▋  | 10/13 [02:55<01:07, 22.43s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 9 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 757.71it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1320.62it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Subset loss backward time: 3.768970012664795\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 125.10it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.77it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.19s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 571.28it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 429.07it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.58s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to categories and quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 111.64it/s]\n",
+      "Evaluating step(10): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.63it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 455.77it/s]\n",
+      "Evaluating step(10): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  5.14it/s]\n",
+      "Proposing:  40%|████      | 2/5 [00:06<00:09,  3.17s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.75\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1732.93it/s]\n",
+      "Evaluating step(11): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:18<00:04,  2.21it/s]\n",
+      "Training Step: 12:  85%|████████▍ | 11/13 [03:24<00:49, 24.61s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8048780487804879 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 128.86it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.24it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 470.20it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2608.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 6\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 6\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 6.722561836242676\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 265.78it/s]\n",
+      "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  3.58it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.65s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 396.33it/s]\n",
+      "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 354.51it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.80s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 789.39it/s]\n",
+      "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 233.79it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 181.12it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.13it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:08<00:02,  2.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 807.04it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 275.78it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:10<00:00,  2.01s/it]\n",
+      "Training Step: 12:  92%|█████████▏| 12/13 [03:43<00:18, 18.61s/it]\n",
+      "Epoch: 100%|██████████| 1/1 [03:43<00:00, 223.37s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n",
+      "Reached max steps\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Starting step: 12\n",
+      "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 13:   0%|          | 0/12 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 13\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 158.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.35it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
+      "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 490.46it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1656.19it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.40it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.77it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 365.97it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9294.86it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['c42fea48-1b90-4388-92c4-b65b4356a3a2']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Trombone: 1\\n\\n  2. Violin: 1\\n\\n  3. Clarinet: 1\\n\\n  4. Accordion: 1\\n\\n  5. Flutes: 4\\n\\n  6. Trumpet: 1\\n\\n  7. Drums: 2\\n\\n  8. Piano: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 + 2 + 1 = 12\\n\\n\\n  Answer: 12'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2578.13it/s]\n",
+      "Evaluating step(13): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:23<00:13,  1.35it/s]\n",
+      "Training Step: 14:  17%|█▋        | 2/12 [00:27<04:35, 27.54s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 14\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.94it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.92it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n",
+      "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.\n",
+      "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 443.10it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3302.60it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 114.14it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.59it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 685.93it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5111.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1784.60it/s]\n",
+      "Evaluating step(14): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:19<00:11,  1.61it/s]\n",
+      "Training Step: 15:  33%|███▎      | 4/12 [00:52<02:10, 16.36s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 15\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.67it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.12it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
+      "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.02it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 665.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1875.18it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 160.86it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.14it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 621.42it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9054.08it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2982.93it/s]\n",
+      "Evaluating step(15): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 495.97it/s]\n",
+      "Training Step: 16:  42%|████▏     | 5/12 [00:56<01:03,  9.03s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 16\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 127.68it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
+      "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
+      "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 534.68it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.71it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10453.09it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.85it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.52it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 560.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1250.72it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3902.04it/s]\n",
+      "Evaluating step(16): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 398.91it/s]\n",
+      "Training Step: 17:  58%|█████▊    | 7/12 [00:58<00:35,  7.16s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 17\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rLoading Data: 100%|██████████| 4/4 [00:00<00:00, 106.99it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training:  75%|███████▌  | 3/4 [00:00<00:00,  3.09it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 334.77it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 874.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 370.55it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.81it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 482.84it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 645.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2888.08it/s]\n",
+      "Evaluating step(17): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 221.76it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 18:  67%|██████▋   | 8/12 [01:02<00:19,  4.87s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 18\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.28it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n",
+      "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n",
+      "\n",
+      "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 585.96it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.18it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1038.07it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 250.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.18it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 438.82it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2456.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2422.27it/s]\n",
+      "Evaluating step(18): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 456.47it/s]\n",
+      "Training Step: 19:  75%|███████▌  | 9/12 [01:05<00:13,  4.41s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 19\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 57.52it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n",
+      "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.16it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.38it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6143.25it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 107.12it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.70it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10505.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3644.75it/s]\n",
+      "Evaluating step(19): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 275.17it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 20:  92%|█████████▏| 11/12 [01:09<00:04,  4.32s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 20\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 125.16it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n",
+      "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 328.35it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 999.36it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.24it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 353.26it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 391.07it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1763.23it/s]\n",
+      "Evaluating step(20): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:17<00:18,  1.38it/s]\n",
+      "Training Step: 21: 100%|██████████| 12/12 [01:34<00:00,  7.82s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.68 <= 0.86, revert\n",
+      "Training Step: 21\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
+      "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
+      "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.56it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 187.26it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2595.49it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 129.91it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.30it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 689.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3183.48it/s]\n",
+      "Evaluating step(21): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 438.75it/s]\n",
+      "Training Step: 22: : 13it [01:38,  6.76s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 22\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 86.81it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
+      "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 143.58it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 842.95it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.83it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.36it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 326.14it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 307.38it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5440.79it/s]\n",
+      "Evaluating step(22): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:00<00:00, 303.26it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.68 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 23: : 14it [01:42,  6.13s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 23\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 91.93it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
+      "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
+      "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.56it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 63.89it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.61it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.96it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 287.69it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1938.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3010.90it/s]\n",
+      "Evaluating step(23): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 423.98it/s]\n",
+      "Training Step: 24: : 16it [01:48,  6.22s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 24\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 000a3738-1f09-40b0-9f8b-2dec63a3f7f8 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.21it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 106.06it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1513.37it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 265.42it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 171.27it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 862.32it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['840d9ed5-8222-45a9-a406-7445feae9733']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Table: 1\\n\\n  3. Fridge: 1\\n\\n  4. Stove: 1\\n\\n  5. Oven: 1\\n\\n  6. Toaster: 1\\n\\n  7. Couch: 1\\n\\n  8. Cars: 4\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 1 + 1 + 1 + 4 = 11\\n\\n\\n  Answer: 11'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1210.01it/s]\n",
+      "Evaluating step(24): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:18<00:00,  2.69it/s]\n",
+      "Training Step: 24: 100%|██████████| 12/12 [02:15<00:00, 11.26s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.86 <= 0.86, revert\n",
+      "Saved ckpt to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Training time: 359.32386112213135s\n",
+      "ckpt_file: /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
+    "      raw_shots=0, bootstrap_shots=1,\n",
+    "      exclude_input_fields_from_bootstrap_demos=True\n",
+    "      )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "KAyFhzrG_J4l"
+   },
+   "source": [
+    "Here is our scores for each step:\n",
+    "\n",
+    "\"val_scores\": [\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.84,\n",
+    "        0.84,\n",
+    "        0.84,\n",
+    "        0.84,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86\n",
+    "    ]\n",
+    "\n",
+    "  \"test_scores\": [\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.91,\n",
+    "        0.91,\n",
+    "        0.91,\n",
+    "        0.91,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87\n",
+    "    ]\n",
+    "\n",
+    "\n",
+    "It is normal when the score of the validation does not exactly match to that of the test set. You can also train with just the test set. You can modify the fit arguments as\n",
+    "\n",
+    "```\n",
+    "trainer.fit(\n",
+    "        train_dataset=train_dataset,\n",
+    "        val_dataset=test_dataset,\n",
+    "        # test_dataset=test_dataset,\n",
+    "        debug=debug,\n",
+    "        resume_from_ckpt=resume_from_ckpt,\n",
+    "    )\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "953BV81y0JFv"
+   },
+   "source": [
+    "# 🔥 Resume Checkpoint\n",
+    "\n",
+    "We might want to continue from the earlier step and to train more steps\n",
+    "\n",
+    "This is easy to do.\n",
+    "\n",
+    "**Note: Ensure you copy the path you had, and replace it, as your run might create a different file name.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "kde1V1AE7Ty0",
+    "outputId": "52d69b69-0a3a-4780-ca26-25956cc023c7"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountAdalComponent(\n",
+      "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "  (task): ObjectCountTaskPipeline(\n",
+      "    (llm_counter): Generator(\n",
+      "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "      (prompt): Prompt(\n",
+      "        template: <START_OF_SYSTEM_PROMPT>\n",
+      "        {{system_prompt}}\n",
+      "        {# Few shot demos #}\n",
+      "        {% if few_shot_demos is not none %}\n",
+      "        Here are some examples:\n",
+      "        {{few_shot_demos}}\n",
+      "        {% endif %}\n",
+      "        <END_OF_SYSTEM_PROMPT>\n",
+      "        <START_OF_USER>\n",
+      "        {{input_str}}\n",
+      "        <END_OF_USER>\n",
+      "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "      )\n",
+      "      (model_client): OpenAIClient()\n",
+      "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "    )\n",
+      "  )\n",
+      "  (loss_fn): EvalFnToTextLoss()\n",
+      ")\n",
+      "Trainer(\n",
+      "  (adaltask): ObjectCountAdalComponent(\n",
+      "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "    (task): ObjectCountTaskPipeline(\n",
+      "      (llm_counter): Generator(\n",
+      "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "        (prompt): Prompt(\n",
+      "          template: <START_OF_SYSTEM_PROMPT>\n",
+      "          {{system_prompt}}\n",
+      "          {# Few shot demos #}\n",
+      "          {% if few_shot_demos is not none %}\n",
+      "          Here are some examples:\n",
+      "          {{few_shot_demos}}\n",
+      "          {% endif %}\n",
+      "          <END_OF_SYSTEM_PROMPT>\n",
+      "          <START_OF_USER>\n",
+      "          {{input_str}}\n",
+      "          <END_OF_USER>\n",
+      "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "        )\n",
+      "        (model_client): OpenAIClient()\n",
+      "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "      )\n",
+      "    )\n",
+      "    (loss_fn): EvalFnToTextLoss()\n",
+      "  )\n",
+      ")\n",
+      "raw_shots: 0, bootstrap_shots: 1\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Backward engine configured for all generators.\n",
+      "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 27:   0%|          | 0/13 [00:00<?, ?it/s]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.64it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1073.40it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.14it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1126.21it/s]\n",
+      "Training Step: 28:   0%|          | 0/13 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 0 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 604.56it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.83it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 540.00it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1445.81it/s]\n",
+      "Training Step: 29:  15%|█▌        | 2/13 [00:02<00:12,  1.15s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 1 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 318.87it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.06it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 458.88it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1186.26it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 4.518843650817871\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 142.52it/s]\n",
+      "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.56it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.99s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities specified in the input. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 118.95it/s]\n",
+      "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.76it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:05<00:08,  2.85s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 129.26it/s]\n",
+      "Evaluating step(2): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.54it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 263.51it/s]\n",
+      "Evaluating step(2): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n",
+      "Proposing:  40%|████      | 2/5 [00:10<00:15,  5.11s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.75\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2287.37it/s]\n",
+      "Evaluating step(29): 0.8158 across 38 samples, Max potential: 0.86:  76%|███████▌  | 38/50 [00:17<00:05,  2.17it/s]\n",
+      "Training Step: 30:  23%|██▎       | 3/13 [00:35<02:25, 14.59s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.7948717948717948 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.93it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.69it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 603.76it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8825.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.875\n",
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Subset loss backward time: 2.2182435989379883\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure you account for all items. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 160.12it/s]\n",
+      "Evaluating step(3): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.72it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.83s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 89.23it/s]\n",
+      "Evaluating step(3): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.66it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 281.73it/s]\n",
+      "Evaluating step(3): 1.0 across 8 samples, Max potential: 1.0: 100%|██████████| 8/8 [00:02<00:00,  2.96it/s]\n",
+      "Proposing:  20%|██        | 1/5 [00:08<00:34,  8.54s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.875\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1910.10it/s]\n",
+      "Evaluating step(30): 0.72 across 25 samples, Max potential: 0.86:  50%|█████     | 25/50 [00:18<00:18,  1.38it/s]\n",
+      "Training Step: 31:  31%|███       | 4/13 [01:05<03:03, 20.39s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.6923076923076923 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 310.31it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.32it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12336.19it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.9166666666666666\n",
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 2.028568983078003\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 121.52it/s]\n",
+      "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 12/12 [00:00<00:00, 724.90it/s]\n",
+      "Evaluating step(4): 1.0 across 12 samples, Max potential: 1.0: 100%|██████████| 12/12 [00:03<00:00,  3.66it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:05<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.9166666666666666\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2233.56it/s]\n",
+      "Evaluating step(31): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:16<00:01,  2.81it/s]\n",
+      "Training Step: 32:  38%|███▊      | 5/13 [01:31<02:58, 22.30s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8333333333333334 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 269.31it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.20it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 606.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.58it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 15\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.9375\n",
+      "Moving batch correct size: 15\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 3.2150633335113525\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 130.57it/s]\n",
+      "Evaluating step(5): 0.5 across 2 samples, Max potential: 0.6667:  33%|███▎      | 1/3 [00:01<00:02,  1.39s/it]INFO:backoff:Backing off call(...) for 0.2s (openai.InternalServerError: <html>\n",
+      "<head><title>500 Internal Server Error</title></head>\n",
+      "<body>\n",
+      "<center><h1>500 Internal Server Error</h1></center>\n",
+      "<hr><center>nginx</center>\n",
+      "</body>\n",
+      "</html>)\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:50<00:00, 16.89s/it]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:52<03:28, 52.11s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 645.05it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 298.94it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:53<01:07, 22.46s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 751.40it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 360.88it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:54<00:25, 12.66s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 332.13it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 276.08it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:55<00:08,  8.12s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 440.13it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 235.96it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:57<00:00, 11.41s/it]\n",
+      "Training Step: 33:  46%|████▌     | 6/13 [02:33<04:07, 35.35s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 317.05it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 676.47it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 543.36it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1518.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 7.857504606246948\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 282.66it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:02<00:00,  2.75it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.26s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 687.22it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 539.26it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:04<00:06,  2.16s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 608.62it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 246.48it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.68s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 417.60it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 422.96it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.58s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 464.91it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 269.93it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.67s/it]\n",
+      "Training Step: 34:  54%|█████▍    | 7/13 [02:49<02:55, 29.23s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 104.68it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 556.85it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14230.04it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 6.2225048542022705\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 584.16it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  4.41it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.54s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 943.25it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 367.37it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.65s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 802.76it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 290.57it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 736.81it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 352.92it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.31s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 596.84it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 250.75it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.45s/it]\n",
+      "Training Step: 35:  62%|██████▏   | 8/13 [03:04<02:04, 24.82s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 70.79it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 388.55it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2027.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 5.618266582489014\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 304.00it/s]\n",
+      "Evaluating step(8): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.79it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 128.97it/s]\n",
+      "Evaluating step(8): 1.0 across 6 samples, Max potential: 1.0: 100%|██████████| 6/6 [00:01<00:00,  3.62it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 20/20 [00:00<00:00, 649.93it/s]\n",
+      "Evaluating step(8): 0.95 across 20 samples, Max potential: 0.95: 100%|██████████| 20/20 [00:02<00:00,  8.93it/s]\n",
+      "Proposing:  20%|██        | 1/5 [00:08<00:35,  8.79s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 0.95 >= 0.9\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2667.62it/s]\n",
+      "Evaluating step(35): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:00<00:00, 559.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8333333333333334 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 36:  69%|██████▉   | 9/13 [03:21<01:29, 22.39s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 154.85it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.33it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.06it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1798.78it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.95\n",
+      "Moving batch correct size: 19\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 2.553833246231079\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 228.47it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.44it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.47s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 700.57it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 207.56it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.69s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 782.91it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 712.51it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 269.05it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 266.32it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.40s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 466.64it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 498.14it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.48s/it]\n",
+      "Training Step: 37:  77%|███████▋  | 10/13 [03:33<00:56, 18.97s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.54it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.77it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 561.81it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 23\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.95\n",
+      "Moving batch correct size: 19\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 2.35148024559021\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 139.22it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.95it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:01<00:07,  1.81s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 277.60it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 561.39it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:02<00:04,  1.42s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 736.01it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 168.63it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:03<00:02,  1.24s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 441.77it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 518.09it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.19s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 396.70it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 199.84it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]\n",
+      "Training Step: 38:  85%|████████▍ | 11/13 [03:43<00:32, 16.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 138.49it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  6.41it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.01it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10665.74it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 11.797855138778687\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 221.09it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:01<00:00,  4.45it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.46s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 690.80it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 309.16it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.61s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 488.13it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 365.81it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.36s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 693.52it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 272.61it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.35s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 767.58it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 719.89it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.41s/it]\n",
+      "Training Step: 38:  92%|█████████▏| 12/13 [04:02<00:20, 20.21s/it]\n",
+      "Epoch: 100%|██████████| 1/1 [04:02<00:00, 242.58s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n",
+      "Reached max steps\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Starting step: 38\n",
+      "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 39:   0%|          | 0/12 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 39\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.31it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n",
+      "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
+      "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
+      "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 812.53it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 2283.86it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11023.14it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 294.28it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 485.47it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11015.90it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['0e8910c8-703d-4766-a483-c5691125fd03']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Fridge\\n\\n  2. Chair\\n\\n  3. Bed\\n\\n  4. Oven\\n\\n  5. Microwave\\n\\n  6. Car\\n\\n\\n  There are 6 objects in total.\\n\\n\\n  Answer: 6'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3194.64it/s]\n",
+      "Evaluating step(39): 0.6818 across 22 samples, Max potential: 0.86:  44%|████▍     | 22/50 [00:15<00:19,  1.45it/s]\n",
+      "Training Step: 40:  17%|█▋        | 2/12 [00:17<02:58, 17.85s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.6521739130434783 <= 0.86, revert\n",
+      "Training Step: 40\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 697.57it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
+      "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 562.43it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 577.17it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9709.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 142.07it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.41it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 311.77it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 713.44it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3973.84it/s]\n",
+      "Evaluating step(40): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 440.54it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 41:  33%|███▎      | 4/12 [00:22<00:49,  6.19s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 41\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 155.20it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n",
+      "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n",
+      "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n",
+      "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1098.13it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 521.96it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10292.77it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.25it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.39it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.31it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1397.05it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3444.16it/s]\n",
+      "Evaluating step(41): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 318.28it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 42:  42%|████▏     | 5/12 [00:24<00:25,  3.71s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 42\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.35it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n",
+      "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n",
+      "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 522.44it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 344.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14755.69it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.06it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.03it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.94it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5319.35it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4718.96it/s]\n",
+      "Evaluating step(42): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 404.64it/s]\n",
+      "Training Step: 43:  58%|█████▊    | 7/12 [00:27<00:17,  3.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
+      "Training Step: 43\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 261.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n",
+      "\n",
+      "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 428.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 296.10it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11374.38it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.89it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 447.30it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 475.76it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4960.15it/s]\n",
+      "Evaluating step(43): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 464.52it/s]\n",
+      "Training Step: 44:  67%|██████▋   | 8/12 [00:30<00:10,  2.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
+      "Training Step: 44\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 237.83it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
+      "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
+      "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1138.91it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 394.77it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 443.51it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.66it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.52it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 373.33it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 830.43it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1197.95it/s]\n",
+      "Evaluating step(44): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:22<00:04,  1.87it/s]\n",
+      "Training Step: 45:  75%|███████▌  | 9/12 [00:57<00:24,  8.31s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.813953488372093 <= 0.86, revert\n",
+      "Training Step: 45\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.91it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n",
+      "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
+      "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n",
+      "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 731.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 244.23it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 395.27it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 140.54it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 448.16it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 658.37it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2621.44it/s]\n",
+      "Evaluating step(45): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 306.53it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 46:  92%|█████████▏| 11/12 [00:59<00:06,  6.78s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 46\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 256.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
+      "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 426.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.65it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 380.40it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 251.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 411.12it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 511.05it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4016.92it/s]\n",
+      "Evaluating step(46): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:00<00:00, 303.81it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.813953488372093 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 47: 100%|██████████| 12/12 [01:01<00:00,  4.42s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 47\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 96.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
+      "\n",
+      "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 341.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 167.75it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 846.95it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.09it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 191.47it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 923.91it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2836.52it/s]\n",
+      "Evaluating step(47): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 371.59it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 48: : 13it [01:07,  4.63s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 48\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 189.96it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n",
+      "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 295.41it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.24it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1621.93it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 153.47it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.08it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 344.25it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s list and count the vegetables mentioned:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (two cabbages)\\n\\n  4. Garlic\\n\\n  5. Carrot\\n\\n  6. Broccoli (head of broccoli)\\n\\n  7. Potato\\n\\n  8. Celery (stalk of celery)\\n\\n  9. Lettuce (lettuce head)\\n\\n\\n  Now, let''s count each vegetable:\\n\\n\\n  1. Yam: 1\\n\\n  2. Cauliflower: 1\\n\\n  3. Cabbages: 2\\n\\n  4. Garlic: 1\\n\\n  5. Carrot: 1\\n\\n  6. Broccoli: 1\\n\\n  7. Potato: 1\\n\\n  8. Celery: 1\\n\\n  9. Lettuce: 1\\n\\n\\n  Adding them up:\\n\\n\\n  1 + 1 + 2 + 1 + 1 + 1 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1430.74it/s]\n",
+      "Evaluating step(48): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:28<00:07,  1.41it/s]\n",
+      "Training Step: 49: : 14it [01:39, 11.59s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
+      "Training Step: 49\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.71it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
+      "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
+      "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 421.38it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 121.46it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1767.14it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 166.47it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.02it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 206.20it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 983.31it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3168.14it/s]\n",
+      "Evaluating step(49): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 492.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 50: : 16it [01:42,  9.33s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 50\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 108.30it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n",
+      "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n",
+      "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 220.83it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.57it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.12it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.93it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.82it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2261.91it/s]\n",
+      "Evaluating step(50): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 281.78it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 50: 100%|██████████| 12/12 [01:49<00:00,  9.15s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Training time: 352.5873613357544s\n",
+      "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n",
+    "\n",
+    "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
+    "                raw_shots=0, bootstrap_shots=1,\n",
+    "                resume_from_ckpt=ckpt_path,\n",
+    "                exclude_input_fields_from_bootstrap_demos=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "m5fZGQqLE78r"
+   },
+   "source": [
+    "I decide to try more, this time, using strategy \"random\". And in the bootstrap demo, there is one shot, but I ensure I also add the \"input\" in the demonstration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "78JAv4ULEn07",
+    "outputId": "e87bb360-fc26-4dbd-d163-86ab32c292df"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountAdalComponent(\n",
+      "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "  (task): ObjectCountTaskPipeline(\n",
+      "    (llm_counter): Generator(\n",
+      "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "      (prompt): Prompt(\n",
+      "        template: <START_OF_SYSTEM_PROMPT>\n",
+      "        {{system_prompt}}\n",
+      "        {# Few shot demos #}\n",
+      "        {% if few_shot_demos is not none %}\n",
+      "        Here are some examples:\n",
+      "        {{few_shot_demos}}\n",
+      "        {% endif %}\n",
+      "        <END_OF_SYSTEM_PROMPT>\n",
+      "        <START_OF_USER>\n",
+      "        {{input_str}}\n",
+      "        <END_OF_USER>\n",
+      "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "      )\n",
+      "      (model_client): OpenAIClient()\n",
+      "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "    )\n",
+      "  )\n",
+      "  (loss_fn): EvalFnToTextLoss()\n",
+      ")\n",
+      "Trainer(\n",
+      "  (adaltask): ObjectCountAdalComponent(\n",
+      "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "    (task): ObjectCountTaskPipeline(\n",
+      "      (llm_counter): Generator(\n",
+      "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "        (prompt): Prompt(\n",
+      "          template: <START_OF_SYSTEM_PROMPT>\n",
+      "          {{system_prompt}}\n",
+      "          {# Few shot demos #}\n",
+      "          {% if few_shot_demos is not none %}\n",
+      "          Here are some examples:\n",
+      "          {{few_shot_demos}}\n",
+      "          {% endif %}\n",
+      "          <END_OF_SYSTEM_PROMPT>\n",
+      "          <START_OF_USER>\n",
+      "          {{input_str}}\n",
+      "          <END_OF_USER>\n",
+      "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "        )\n",
+      "        (model_client): OpenAIClient()\n",
+      "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "      )\n",
+      "    )\n",
+      "    (loss_fn): EvalFnToTextLoss()\n",
+      "  )\n",
+      ")\n",
+      "raw_shots: 0, bootstrap_shots: 1\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Backward engine configured for all generators.\n",
+      "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 51:   0%|          | 0/13 [00:00<?, ?it/s]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 415.27it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 224.54it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 423.57it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10894.30it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2199.38it/s]\n",
+      "Evaluating step(51): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.97it/s]\n",
+      "Training Step: 52:   8%|▊         | 1/13 [00:18<03:38, 18.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.86 <= 0.86\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.10it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 785.01it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 842.02it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6660.27it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1760.33it/s]\n",
+      "Evaluating step(52): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.96it/s]\n",
+      "Training Step: 53:  15%|█▌        | 2/13 [00:36<03:21, 18.28s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.86 <= 0.86\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.26it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 988.41it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 608.29it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1177.76it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2074.29it/s]\n",
+      "Evaluating step(53): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:16<00:00,  3.07it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.88 > 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5848.08it/s]\n",
+      "Evaluating step(53): 0.9 across 100 samples, Max potential: 0.9: 100%|██████████| 100/100 [00:30<00:00,  3.32it/s]\n",
+      "Training Step: 54:  23%|██▎       | 3/13 [01:28<05:35, 33.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 297.78it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.95it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 407.40it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8952.62it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1274.72it/s]\n",
+      "Evaluating step(54): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.94 > 0.88\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 6831.78it/s]\n",
+      "Evaluating step(54): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:30<00:00,  3.33it/s]\n",
+      "Training Step: 55:  31%|███       | 4/13 [02:21<06:10, 41.21s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 152.84it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.28it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 688.86it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1318.45it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data='You will answer a reasoning question. Carefully count each item and verify your total. List each item individually, ensuring each is counted as \"1\" regardless of quantity mentioned. Show your calculations step by step. The last line of your response should be: \\'Answer: $VALUE\\' where VALUE is a numerical value.', requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2011.16it/s]\n",
+      "Evaluating step(55): 0.8696 across 23 samples, Max potential: 0.94:  46%|████▌     | 23/50 [00:15<00:17,  1.52it/s]\n",
+      "Training Step: 56:  38%|███▊      | 5/13 [02:46<04:43, 35.43s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8333333333333334 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.66it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 646.55it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2217.45it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4572.35it/s]\n",
+      "Evaluating step(56): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:00<00:00, 390.77it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.94 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 57:  46%|████▌     | 6/13 [02:54<03:02, 26.03s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 145.48it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.52it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.76it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1437.76it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check for any grouped items and count them correctly. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1614.47it/s]\n",
+      "Evaluating step(57): 0.7857 across 14 samples, Max potential: 0.94:  28%|██▊       | 14/50 [00:19<00:50,  1.41s/it]\n",
+      "Training Step: 58:  54%|█████▍    | 7/13 [03:23<02:42, 27.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.7333333333333333 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 137.96it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 806.79it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11522.81it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be formatted as: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3560.17it/s]\n",
+      "Evaluating step(58): 0.88 across 25 samples, Max potential: 0.94:  50%|█████     | 25/50 [00:17<00:17,  1.45it/s]\n",
+      "Training Step: 59:  62%|██████▏   | 8/13 [03:47<02:10, 26.06s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8461538461538461 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.90it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.70it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 552.01it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5648.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1770.11it/s]\n",
+      "Evaluating step(59): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:16<00:03,  2.49it/s]\n",
+      "Training Step: 60:  69%|██████▉   | 9/13 [04:13<01:43, 26.00s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.9069767441860465 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 314.86it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.10it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 722.53it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7940.00it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count for precision. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 7188.43it/s]\n",
+      "Evaluating step(60): 0.8966 across 29 samples, Max potential: 0.94:  58%|█████▊    | 29/50 [00:15<00:11,  1.84it/s]\n",
+      "Training Step: 61:  77%|███████▋  | 10/13 [04:35<01:14, 24.87s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8666666666666667 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 95.68it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.74it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12520.31it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3036.62it/s]\n",
+      "Evaluating step(61): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.89it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.9069767441860465 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 62:  85%|████████▍ | 11/13 [04:44<00:40, 20.14s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.40it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.17it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.11it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14339.50it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5035.06it/s]\n",
+      "Evaluating step(62): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.19it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.9069767441860465 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 62:  92%|█████████▏| 12/13 [04:51<00:24, 24.28s/it]\n",
+      "Epoch:   0%|          | 0/1 [04:51<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Reached max steps\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Starting step: 62\n",
+      "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 63:   0%|          | 0/12 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 63\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 175.38it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
+      "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.32it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.92it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9063.87it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 132.51it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.85it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 913.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1900.02it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2838.94it/s]\n",
+      "Evaluating step(63): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:31<03:48,  5.20s/it]\n",
+      "Training Step: 64:  17%|█▋        | 2/12 [00:36<06:01, 36.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
+      "Training Step: 64\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 173.87it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n",
+      "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.64it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1138.44it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3232.60it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 151.65it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.21it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 725.72it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10845.00it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2971.02it/s]\n",
+      "Evaluating step(64): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:00<00:00, 136.83it/s]\n",
+      "Training Step: 65:  33%|███▎      | 4/12 [00:41<01:29, 11.21s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
+      "Training Step: 65\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:00<00:00,  2.54it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.70it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6304.85it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 218.71it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.19it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 858.52it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 768.93it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1635.33it/s]\n",
+      "Evaluating step(65): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass validation: 0.96 > 0.94\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 3294.35it/s]\n",
+      "Evaluating step(65): 0.95 across 100 samples, Max potential: 0.95: 100%|██████████| 100/100 [00:39<00:00,  2.51it/s]\n",
+      "Training Step: 66:  42%|████▏     | 5/12 [01:50<02:42, 23.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 66\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 186.04it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.01it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.30it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n",
+      "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 636.54it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9420.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.34it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.50it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.28it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 731.61it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1120.89it/s]\n",
+      "Evaluating step(66): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 212.00it/s]\n",
+      "Training Step: 67:  58%|█████▊    | 7/12 [01:55<01:32, 18.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n",
+      "Training Step: 67\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 31.60it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  25%|██▌       | 1/4 [00:01<00:05,  1.78s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  75%|███████▌  | 3/4 [00:02<00:00,  1.63it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 420.84it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 533.39it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 48.64it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.64it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 396.85it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8608.11it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4202.88it/s]\n",
+      "Evaluating step(67): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 405.51it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 68:  67%|██████▋   | 8/12 [02:02<00:47, 11.99s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 68\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 77.30it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training:  75%|███████▌  | 3/4 [00:01<00:00,  2.62it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.56it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10831.00it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 179.03it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.09it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 502.04it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 639.84it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3305.62it/s]\n",
+      "Evaluating step(68): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 539.54it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 69:  75%|███████▌  | 9/12 [02:09<00:32, 10.69s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 69\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 84.70it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.26s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
+      "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
+      "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 331.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 488.36it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 274.35it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.51it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 596.31it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14678.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4604.98it/s]\n",
+      "Evaluating step(69): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 88.47it/s]\n",
+      "Training Step: 70:  92%|█████████▏| 11/12 [02:13<00:08,  8.97s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Training Step: 70\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 169.70it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.03s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
+      "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.45it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n",
+      "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 285.47it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 288.20it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.75it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 293.12it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1091.27it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4087.46it/s]\n",
+      "Evaluating step(70): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 345.89it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 71: 100%|██████████| 12/12 [02:17<00:00,  6.07s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 71\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.52it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.33s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.37it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  1.92it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.50it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10369.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 43.64it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.11it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1141.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4407.91it/s]\n",
+      "Evaluating step(71): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 397.66it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 72: : 13it [02:23,  6.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 72\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 113.31it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.04it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.82it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  2.48it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.86it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 170.72it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 13981.01it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.45it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.46it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 241.42it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 322.68it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3970.90it/s]\n",
+      "Evaluating step(72): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 96.75it/s] \n",
+      "Training Step: 73: : 14it [02:30,  6.33s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Training Step: 73\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 73.23it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:05,  1.97s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
+      "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
+      "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 211.00it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1551.58it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 205.19it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.20it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1059.57it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2226.42it/s]\n",
+      "Evaluating step(73): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 91.13it/s]\n",
+      "Training Step: 74: : 16it [02:35,  6.09s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Training Step: 74\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.65it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:04,  1.36s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
+      "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n",
+      "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.74it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1086.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.28it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 224.88it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14242.12it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['b538075d-01af-4b76-b835-9005f3044609']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a blackberry, a raspberry, a peach, a head of broccoli, a plum,\\n  an orange, two bananas, a grape, two garlics, a nectarine, a lettuce head, and an\\n  apple. How many fruits do I have?\\nExample: 'Let''s list each item and identify whether it is a fruit:\\n\\n\\n  1. Blackberry - Fruit\\n\\n  2. Raspberry - Fruit\\n\\n  3. Peach - Fruit\\n\\n  4. Head of broccoli - Not a fruit\\n\\n  5. Plum - Fruit\\n\\n  6. Orange - Fruit\\n\\n  7. Two bananas - Fruits (2 bananas)\\n\\n  8. Grape - Fruit\\n\\n  9. Two garlics - Not fruits\\n\\n  10. Nectarine - Fruit\\n\\n  11. Lettuce head - Not a fruit\\n\\n  12. Apple - Fruit\\n\\n\\n  Now, let''s count the fruits:\\n\\n\\n  1. Blackberry\\n\\n  2. Raspberry\\n\\n  3. Peach\\n\\n  4. Plum\\n\\n  5. Orange\\n\\n  6. Two bananas (counted as 2)\\n\\n  7. Grape\\n\\n  8. Nectarine\\n\\n  9. Apple\\n\\n\\n  Total number of fruits:\\n\\n  1 + 1 + 1 + 1 + 1 + 2 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 634.93it/s]\n",
+      "Evaluating step(74): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:28<05:25,  7.07s/it]\n",
+      "Training Step: 74: 100%|██████████| 12/12 [03:12<00:00, 16.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Training time: 484.17421078681946s\n",
+      "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "train(debug=False, max_steps=12, strategy=\"random\",\n",
+    "                raw_shots=0, bootstrap_shots=1,\n",
+    "                resume_from_ckpt=ckpt_path,\n",
+    "                exclude_input_fields_from_bootstrap_demos=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "xTB4lO3PFPnP"
+   },
+   "source": [
+    "Finally, we got 96% on the val and 95% on the test!!! This is really close to GPT4o's performance. This took us 72 steps!\n",
+    "\n",
+    "The score is consistent, meaning this is a good prompt.\n",
+    "Here is our final optimized prompt:\n",
+    "\n",
+    "System:\n",
+    "\n",
+    "```\n",
+    "\n",
+    "\"prompt\": [\n",
+    "                {\n",
+    "                    \"id\": \"327b63f0-b532-435a-85d7-6137d4e52c4c\",\n",
+    "                    \"name\": \"llm_counter.system_prompt\",\n",
+    "                    \"data\": \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
+    "                    \"requires_opt\": true\n",
+    "                },\n",
+    "                {\n",
+    "                    \"id\": \"73a3953b-6351-44d8-a36f-7521db346cca\",\n",
+    "                    \"name\": \"llm_counter.few_shot_demos\",\n",
+    "                    \"data\": \"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\",\n",
+    "                    \"requires_opt\": true\n",
+    "                }\n",
+    "            ]\n",
+    "```\n",
+    "\n",
+    "\n",
+    "You will see all steps record from the log."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Fr0V3XNCHAis"
+   },
+   "source": [
+    "Happy Optimizing!!!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "3Wnvqs3RyI_z"
+   },
+   "source": [
+    "# Issues and feedback\n",
+    "\n",
+    "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
+    "\n",
+    "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
diff --git a/notebooks/tutorials/adalflow_component.ipynb b/notebooks/tutorials/adalflow_component.ipynb
new file mode 100644
index 00000000..2da8aa78
--- /dev/null
+++ b/notebooks/tutorials/adalflow_component.ipynb
@@ -0,0 +1,985 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# 🤗 Welcome to AdalFlow!\n",
+        "## The library to build & auto-optimize any LLM task pipelines\n",
+        "\n",
+        "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ <i>Star us on <a href=\"https://github.com/SylphAI-Inc/AdalFlow\">Github</a> </i> ⭐\n",
+        "\n",
+        "\n",
+        "# Quick Links\n",
+        "\n",
+        "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
+        "\n",
+        "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
+        "\n",
+        "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
+        "\n",
+        "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
+        "\n",
+        "# Author\n",
+        "\n",
+        "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n",
+        "\n",
+        "# Outline\n",
+        "\n",
+        "This is a quick introduction of what AdalFlow is capable of. We will cover:\n",
+        "\n",
+        "* How to use `DataClass` with `DataClassParser`.\n",
+        "* How to do nested dataclass, we will test both one and two levels of nesting.\n",
+        "\n",
+        "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n",
+        "\n",
+        "\n",
+        "# Installation\n",
+        "\n",
+        "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n",
+        "\n",
+        "  ```bash\n",
+        "  pip install adalflow[openai,groq]\n",
+        "  ```\n",
+        "2. Setup  `openai` and `groq` API key in the environment variables"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "Ab_OmE6XTl4h"
+      },
+      "outputs": [],
+      "source": [
+        "from IPython.display import clear_output\n",
+        "\n",
+        "!pip install -U adalflow[openai,groq,datasets]\n",
+        "\n",
+        "clear_output()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "PbAIsBeeTQUk"
+      },
+      "outputs": [],
+      "source": [
+        "import re\n",
+        "from adalflow.core import Component, Generator\n",
+        "from adalflow.components.model_client import OpenAIClient\n",
+        "from adalflow.components.model_client import GroqAPIClient\n",
+        "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kRymwpwHTQUm",
+        "outputId": "6a992f52-1661-4002-ef74-ed26938c6baa"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Please enter your OpenAI API key: ··········\n",
+            "API keys have been set.\n"
+          ]
+        }
+      ],
+      "source": [
+        "from getpass import getpass\n",
+        "import os\n",
+        "\n",
+        "# Prompt user to enter their API keys securely\n",
+        "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+        "\n",
+        "# Set environment variables\n",
+        "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+        "\n",
+        "print(\"API keys have been set.\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "czGDvnVUTQUm"
+      },
+      "outputs": [],
+      "source": [
+        "template_doc = r\"\"\"<SYS> You are a doctor </SYS> User: {{input_str}}\"\"\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PPs3gHqeTQUn"
+      },
+      "source": [
+        "Let's turn on the library log to help with debugging."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "98QNsOcSTQUn",
+        "outputId": "d63cba1b-6087-4b04-bb2b-0a9d9d4500a5"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "<RootLogger root (INFO)>"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "from adalflow.utils import get_logger\n",
+        "get_logger()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "b3ey1lozTQUo"
+      },
+      "outputs": [],
+      "source": [
+        "#Toy example\n",
+        "\n",
+        "class DocQA(Component):\n",
+        "    def __init__(self):\n",
+        "        super(DocQA, self).__init__()\n",
+        "        self.doc = Generator(\n",
+        "            template=template_doc,\n",
+        "            model_client=OpenAIClient(),\n",
+        "            model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n",
+        "        )\n",
+        "\n",
+        "    def call(self, query: str) -> str:\n",
+        "        return self.doc(prompt_kwargs={\"input_str\": query}).data\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TZAHSrbUTQUo",
+        "outputId": "66e81fb3-17f9-4570-dbbd-681cad1afc65"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:40:52 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n",
+            "2024-11-11 17:40:52 - generator - INFO - [generator.py:144:__init__] - Generator Generator initialized.\n"
+          ]
+        }
+      ],
+      "source": [
+        "doc = DocQA()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "f-y6l44PTQUp",
+        "outputId": "e24aabd5-d758-4700-fa0d-46b66a88c412"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': <diskcache.core.Cache object at 0x7b8d4716abc0>, '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': <function Generator.set_data_map_func.<locals>.default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'DocQA', '_init_args': {}}}\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "{'_components': OrderedDict([('doc',\n",
+              "               Generator(\n",
+              "                 model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+              "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+              "                 (model_client): OpenAIClient()\n",
+              "               ))]),\n",
+              " '_parameters': OrderedDict(),\n",
+              " 'training': False,\n",
+              " 'teacher_mode': False,\n",
+              " 'tracing': False,\n",
+              " 'name': 'DocQA',\n",
+              " '_init_args': {}}"
+            ]
+          },
+          "execution_count": 12,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# states\n",
+        "states = doc.to_dict()\n",
+        "print(states)\n",
+        "doc.__dict__"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z_sH59_bTQUp"
+      },
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "P81kIS2qTQUp",
+        "outputId": "d8e0e398-d704-4a85-8692-66a8c570b910"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Generator, {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': <diskcache.core.Cache object at 0x7b8d4716abc0>, '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': <function Generator.set_data_map_func.<locals>.default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}}\n",
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Prompt, {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}\n",
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}\n",
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "{'_components': OrderedDict([('doc',\n",
+              "               Generator(\n",
+              "                 model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+              "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+              "                 (model_client): OpenAIClient()\n",
+              "               ))]),\n",
+              " '_parameters': OrderedDict(),\n",
+              " 'training': False,\n",
+              " 'teacher_mode': False,\n",
+              " 'tracing': False,\n",
+              " 'name': 'DocQA',\n",
+              " '_init_args': {}}"
+            ]
+          },
+          "execution_count": 13,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# restore the states\n",
+        "doc2 = DocQA.from_dict(states)\n",
+        "# print(doc2.call(\"What is the capital of France?\"))\n",
+        "doc2.__dict__\n",
+        "# doc2.to_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "198xYpLGTQUp",
+        "outputId": "ffd33d12-6db0-45c2-dfb1-3d57460ad4c9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "{'type': 'DocQA',\n",
+              " 'data': {'_components': {'_ordered_dict': True,\n",
+              "   'data': [('doc',\n",
+              "     {'type': 'Generator',\n",
+              "      'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n",
+              "       'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n",
+              "       'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n",
+              "       'cache': <diskcache.core.Cache at 0x7b8d4716abc0>,\n",
+              "       '_components': {'_ordered_dict': True,\n",
+              "        'data': [('prompt',\n",
+              "          {'type': 'Prompt',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'Prompt',\n",
+              "            '_init_args': {'template': None, 'prompt_kwargs': {}},\n",
+              "            'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "            'prompt_variables': ['input_str'],\n",
+              "            'prompt_kwargs': {}}}),\n",
+              "         ('model_client',\n",
+              "          {'type': 'OpenAIClient',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'OpenAIClient',\n",
+              "            '_init_args': {'api_key': None,\n",
+              "             'chat_completion_parser': None,\n",
+              "             'input_type': 'text'},\n",
+              "            '_api_key': None,\n",
+              "            'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "            '_input_type': 'text'}})]},\n",
+              "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "       'training': False,\n",
+              "       'teacher_mode': False,\n",
+              "       'tracing': False,\n",
+              "       'name': 'Generator',\n",
+              "       '_init_args': {'model_client': None,\n",
+              "        'model_kwargs': {},\n",
+              "        'template': None,\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       'backward_engine': None,\n",
+              "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "       'prompt_kwargs': {},\n",
+              "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "       'output_processors': None,\n",
+              "       'mock_output': False,\n",
+              "       'mock_output_data': 'mock data',\n",
+              "       'data_map_func': <function adalflow.core.generator.Generator.set_data_map_func.<locals>.default_map_func(data: 'GeneratorOutputType') -> str>,\n",
+              "       '_use_cache': False,\n",
+              "       '_kwargs': {'model_client': {'type': 'OpenAIClient',\n",
+              "         'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "          '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "          'training': False,\n",
+              "          'teacher_mode': False,\n",
+              "          'tracing': False,\n",
+              "          'name': 'OpenAIClient',\n",
+              "          '_init_args': {'api_key': None,\n",
+              "           'chat_completion_parser': None,\n",
+              "           'input_type': 'text'},\n",
+              "          '_api_key': None,\n",
+              "          'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "          '_input_type': 'text'}},\n",
+              "        'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "        'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       '_teacher': None}})]},\n",
+              "  '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "  'training': False,\n",
+              "  'teacher_mode': False,\n",
+              "  'tracing': False,\n",
+              "  'name': 'DocQA',\n",
+              "  '_init_args': {}}}"
+            ]
+          },
+          "execution_count": 14,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc2.to_dict() == doc.to_dict()\n",
+        "doc2.to_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Ulb1OWxxTQUq",
+        "outputId": "99972fcd-ed52-43b4-e461-a76c19bd9522"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:41:29 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
+            "2024-11-11 17:41:30 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:41:30 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', metadata=None)\n",
+            "As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(doc(\"What is the best treatment for headache?\"))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "POVal8CgTQUq",
+        "outputId": "2fadb1d6-b858-4964-9045-8ea7454178e3"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:41:35 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
+            "2024-11-11 17:41:36 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:41:36 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', metadata=None)\n",
+            "As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(doc2(\"What is the best treatment for headache?\"))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "R5gTO1-8TQUr"
+      },
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jhgSpKrMTQUr",
+        "outputId": "15615bf7-2b72-4ac7-d1fe-f436a7304734"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "('', DocQA(\n",
+            "  (doc): Generator(\n",
+            "    model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+            "    (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+            "    (model_client): OpenAIClient()\n",
+            "  )\n",
+            "))\n",
+            "('doc', Generator(\n",
+            "  model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+            "  (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+            "  (model_client): OpenAIClient()\n",
+            "))\n",
+            "('doc.prompt', Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str']))\n",
+            "('doc.model_client', OpenAIClient())\n"
+          ]
+        }
+      ],
+      "source": [
+        "# list other subcomponents\n",
+        "\n",
+        "for subcomponent in doc.named_components():\n",
+        "    print(subcomponent)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XjIHAY6bTQUr"
+      },
+      "source": [
+        "Let's add a parameter"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 18,
+      "metadata": {
+        "id": "vxgjAUiFTQUr"
+      },
+      "outputs": [],
+      "source": [
+        "from adalflow.optim.parameter import Parameter\n",
+        "\n",
+        "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "86C-h1e1TQUr",
+        "outputId": "57cab4d0-eddf-433d-e364-5d7f07072fbf"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "('demo', Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}))\n"
+          ]
+        }
+      ],
+      "source": [
+        "# list all parameters\n",
+        "for param in doc.named_parameters():\n",
+        "    print(param)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_s2MPukiTQUr",
+        "outputId": "b51c7d09-fb52-42d9-b2d5-4f44f5d22dc9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "{'type': 'DocQA',\n",
+              " 'data': {'_components': {'_ordered_dict': True,\n",
+              "   'data': [('doc',\n",
+              "     {'type': 'Generator',\n",
+              "      'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n",
+              "       'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n",
+              "       'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n",
+              "       'cache': <diskcache.core.Cache at 0x7b8d4716abc0>,\n",
+              "       '_components': {'_ordered_dict': True,\n",
+              "        'data': [('prompt',\n",
+              "          {'type': 'Prompt',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'Prompt',\n",
+              "            '_init_args': {'template': None, 'prompt_kwargs': {}},\n",
+              "            'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "            'prompt_variables': ['input_str'],\n",
+              "            'prompt_kwargs': {}}}),\n",
+              "         ('model_client',\n",
+              "          {'type': 'OpenAIClient',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'OpenAIClient',\n",
+              "            '_init_args': {'api_key': None,\n",
+              "             'chat_completion_parser': None,\n",
+              "             'input_type': 'text'},\n",
+              "            '_api_key': None,\n",
+              "            'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "            '_input_type': 'text'}})]},\n",
+              "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "       'training': False,\n",
+              "       'teacher_mode': False,\n",
+              "       'tracing': False,\n",
+              "       'name': 'Generator',\n",
+              "       '_init_args': {'model_client': None,\n",
+              "        'model_kwargs': {},\n",
+              "        'template': None,\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       'backward_engine': None,\n",
+              "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "       'prompt_kwargs': {},\n",
+              "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "       'output_processors': None,\n",
+              "       'mock_output': False,\n",
+              "       'mock_output_data': 'mock data',\n",
+              "       'data_map_func': <function adalflow.core.generator.Generator.set_data_map_func.<locals>.default_map_func(data: 'GeneratorOutputType') -> str>,\n",
+              "       '_use_cache': False,\n",
+              "       '_kwargs': {'model_client': {'type': 'OpenAIClient',\n",
+              "         'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "          '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "          'training': False,\n",
+              "          'teacher_mode': False,\n",
+              "          'tracing': False,\n",
+              "          'name': 'OpenAIClient',\n",
+              "          '_init_args': {'api_key': None,\n",
+              "           'chat_completion_parser': None,\n",
+              "           'input_type': 'text'},\n",
+              "          '_api_key': None,\n",
+              "          'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "          '_input_type': 'text'}},\n",
+              "        'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "        'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       '_teacher': None}})]},\n",
+              "  '_parameters': {'_ordered_dict': True,\n",
+              "   'data': [('demo',\n",
+              "     {'name': 'param_313f196d-3c48-4eb3-8138-b7bd74298fbd',\n",
+              "      'role_desc': '',\n",
+              "      'data': 'demo',\n",
+              "      'requires_opt': True,\n",
+              "      'param_type': 'none ()',\n",
+              "      'predecessors': [],\n",
+              "      'gradients': [],\n",
+              "      'previous_data': None,\n",
+              "      'gradients_context': [],\n",
+              "      'grad_fn': 'None',\n",
+              "      'gradient_prompt': 'None',\n",
+              "      'raw_response': None,\n",
+              "      'score': None,\n",
+              "      'traces': {},\n",
+              "      'input_args': None,\n",
+              "      'demos': []})]},\n",
+              "  'training': False,\n",
+              "  'teacher_mode': False,\n",
+              "  'tracing': False,\n",
+              "  'name': 'DocQA',\n",
+              "  '_init_args': {}}}"
+            ]
+          },
+          "execution_count": 20,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc.to_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "metadata": {
+        "id": "mcIO1DuVTQUr"
+      },
+      "outputs": [],
+      "source": [
+        "from adalflow.utils.file_io import save_json\n",
+        "\n",
+        "save_json(doc.to_dict(), \"doc.json\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0vvO0nogTQUr",
+        "outputId": "59131d9e-a996-4c8b-f32c-9a6a623d3db6"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "OrderedDict([('demo',\n",
+              "              Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}))])"
+            ]
+          },
+          "execution_count": 22,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc.state_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 125
+        },
+        "id": "uroqi93tTQUs",
+        "outputId": "8a3e4ecc-1368-475b-dc4d-2ff38821b8ac"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:42:18 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for a cold?'}]}\n",
+            "2024-11-11 17:42:19 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:42:19 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=85, prompt_tokens=28, total_tokens=113), raw_response='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', metadata=None)\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.'"
+            ]
+          },
+          "execution_count": 23,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc.call(\"What is the best treatment for a cold?\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mYSDr462TQUs",
+        "outputId": "82414c82-8feb-4667-90ed-91c594cc6a73"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2\n",
+            "<class 'adalflow.core.component.FunComponent'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "from adalflow.core.component import FunComponent\n",
+        "\n",
+        "def add_one(x):\n",
+        "    return x + 1\n",
+        "\n",
+        "fun_component = FunComponent(add_one)\n",
+        "print(fun_component(1))\n",
+        "print(type(fun_component))\n",
+        "\n",
+        "# output:\n",
+        "# 2\n",
+        "# <class 'core.component.FunComponent'>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 25,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3MW1tpzRTQUs",
+        "outputId": "351b8922-1423-434a-f470-ff435a1962d2"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2\n",
+            "<class 'adalflow.core.component.AddOneComponent'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "from adalflow.core.component import fun_to_component\n",
+        "\n",
+        "fun_component = fun_to_component(add_one)\n",
+        "print(fun_component(1))\n",
+        "print(type(fun_component))\n",
+        "\n",
+        "# output:\n",
+        "# 2\n",
+        "# <class 'adalflow.core.component.AddOneComponent'>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dxAoGrnQTQUs",
+        "outputId": "38c462a3-5abf-41f4-9231-746c8d0ffcb3"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2\n",
+            "<class 'adalflow.core.component.AddOneComponent'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "# use it as a decorator\n",
+        "@fun_to_component\n",
+        "def add_one(x):\n",
+        "    return x + 1\n",
+        "\n",
+        "print(add_one(1))\n",
+        "print(type(add_one))\n",
+        "\n",
+        "# output:\n",
+        "# 2\n",
+        "# <class 'adalflow.core.component.AddOneComponent'>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7BvJEP_mTQUs",
+        "outputId": "066281b8-a650-4c48-c786-312022198015"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:42:39 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n",
+            "2024-11-11 17:42:40 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:42:40 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', error=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=37, total_tokens=74), raw_response='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', metadata=None)\n",
+            "The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.\n"
+          ]
+        }
+      ],
+      "source": [
+        "from adalflow.core import Sequential\n",
+        "\n",
+        "@fun_to_component\n",
+        "def enhance_query(query:str) -> str:\n",
+        "    return query + \"Please be concise and only list the top treatments.\"\n",
+        "\n",
+        "seq = Sequential(enhance_query, doc)\n",
+        "\n",
+        "query = \"What is the best treatment for headache?\"\n",
+        "print(seq(query))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "aoZ2w8RUTQUt",
+        "outputId": "115d0ccf-33d1-4464-a951-cf9f5476284b"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "Sequential(\n",
+              "  (0): EnhanceQueryComponent(fun_name=enhance_query)\n",
+              "  (1): DocQA(\n",
+              "    (doc): Generator(\n",
+              "      model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+              "      (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+              "      (model_client): OpenAIClient()\n",
+              "    )\n",
+              "  )\n",
+              ")"
+            ]
+          },
+          "execution_count": 29,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "seq"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F-ffAlC6TQUt"
+      },
+      "source": [
+        "# TODO: LLM for single choices"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Issues and feedback\n",
+        "\n",
+        "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
+        "\n",
+        "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/notebooks/tutorials/adalflow_dataclasses.ipynb b/notebooks/tutorials/adalflow_dataclasses.ipynb
new file mode 100644
index 00000000..3c96ffe5
--- /dev/null
+++ b/notebooks/tutorials/adalflow_dataclasses.ipynb
@@ -0,0 +1,963 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hGLYrUwBmvUD"
+   },
+   "source": [
+    "<a target=\"_blank\" href=\"https://colab.research.google.com/github.com/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_dataclasses.ipynb\">\n",
+    "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
+    "</a>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gHK6HFngl6iP"
+   },
+   "source": [
+    "# 🤗 Welcome to AdalFlow!\n",
+    "## The library to build & auto-optimize any LLM task pipelines\n",
+    "\n",
+    "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ <i>Star us on <a href=\"https://github.com/SylphAI-Inc/AdalFlow\">Github</a> </i> ⭐\n",
+    "\n",
+    "\n",
+    "# Quick Links\n",
+    "\n",
+    "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
+    "\n",
+    "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
+    "\n",
+    "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
+    "\n",
+    "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
+    "\n",
+    "# Author\n",
+    "\n",
+    "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n",
+    "\n",
+    "# Outline\n",
+    "\n",
+    "This is a quick introduction of what AdalFlow is capable of. We will cover:\n",
+    "\n",
+    "* How to use `DataClass` with `DataClassParser`.\n",
+    "* How to do nested dataclass, we will test both one and two levels of nesting.\n",
+    "\n",
+    "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n",
+    "\n",
+    "\n",
+    "# Installation\n",
+    "\n",
+    "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n",
+    "\n",
+    "  ```bash\n",
+    "  pip install adalflow[openai,groq]\n",
+    "  ```\n",
+    "2. Setup  `openai` and `groq` API key in the environment variables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "nqe-vxB1BCux"
+   },
+   "source": [
+    "### Install adalflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "ZaaevxNH9JMQ"
+   },
+   "outputs": [],
+   "source": [
+    "# Install adalflow with necessary dependencies\n",
+    "from IPython.display import clear_output\n",
+    "\n",
+    "!pip install -U adalflow[openai,groq]\n",
+    "\n",
+    "clear_output()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NGE70aZ8BLuf"
+   },
+   "source": [
+    "### Set Environment Variables\n",
+    "\n",
+    "Note: Enter your api keys in below cell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "j2xmGr_99YDq",
+    "outputId": "c3d1e0b7-9072-412e-fed1-4578404357be"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting .env\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile .env\n",
+    "\n",
+    "OPENAI_API_KEY=\"PASTE-OPENAI_API_KEY_HERE\"\n",
+    "GROQ_API_KEY=\"PASTE-GROQ_API_KEY-HERE\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "API keys have been set.\n"
+     ]
+    }
+   ],
+   "source": [
+    "#  or more securely\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# Prompt user to enter their API keys securely\n",
+    "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n",
+    "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+    "\n",
+    "\n",
+    "# Set environment variables\n",
+    "os.environ['GROQ_API_KEY'] = groq_api_key\n",
+    "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+    "\n",
+    "print(\"API keys have been set.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ZxBkm77uBZpl"
+   },
+   "source": [
+    "### Import necessary libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "wOAiKg899Z2u"
+   },
+   "outputs": [],
+   "source": [
+    "# Import required libraries\n",
+    "from dataclasses import dataclass, field\n",
+    "from typing import List, Dict\n",
+    "import adalflow as adal\n",
+    "from adalflow.components.model_client import GroqAPIClient\n",
+    "from adalflow.utils import setup_env"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'0.2.4'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adal.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "bTzgyp6S9bnH"
+   },
+   "outputs": [],
+   "source": [
+    "# Load environment variables - Make sure to have OPENAI_API_KEY in .env file and .env is present in current folder\n",
+    "setup_env(\".env\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MBW5viOG9hM8"
+   },
+   "source": [
+    "### Basic Vannila Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "YA4pAIek9ewc"
+   },
+   "outputs": [],
+   "source": [
+    "# Define the output structure using dataclass\n",
+    "@dataclass\n",
+    "class BasicQAOutput(adal.DataClass):\n",
+    "    explanation: str = field(\n",
+    "        metadata={\"desc\": \"A brief explanation of the concept in one sentence.\"}\n",
+    "    )\n",
+    "    example: str = field(\n",
+    "        metadata={\"desc\": \"An example of the concept in a sentence.\"}\n",
+    "    )\n",
+    "    # Control output fields order\n",
+    "    __output_fields__ = [\"explanation\", \"example\"]\n",
+    "\n",
+    "# Define the template using jinja2 syntax\n",
+    "qa_template = r\"\"\"<SYS>\n",
+    "You are a helpful assistant.\n",
+    "<OUTPUT_FORMAT>\n",
+    "{{output_format_str}}\n",
+    "</OUTPUT_FORMAT>\n",
+    "</SYS>\n",
+    "<USER> {{input_str}} </USER>\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "id": "x4__jnbP9luN"
+   },
+   "outputs": [],
+   "source": [
+    "# Define the QA component\n",
+    "class QA(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        # Initialize the parser with the output dataclass\n",
+    "        parser = adal.DataClassParser(data_class=BasicQAOutput, return_data_class=True)\n",
+    "\n",
+    "        # Set up the generator with model, template, and parser\n",
+    "        self.generator = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=qa_template,\n",
+    "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str()},\n",
+    "            output_processors=parser,\n",
+    "        )\n",
+    "\n",
+    "    def call(self, query: str):\n",
+    "        \"\"\"Synchronous call to generate response\"\"\"\n",
+    "        return self.generator.call({\"input_str\": query})\n",
+    "\n",
+    "    async def acall(self, query: str):\n",
+    "        \"\"\"Asynchronous call to generate response\"\"\"\n",
+    "        return await self.generator.acall({\"input_str\": query})\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "id": "TVi3rGvs9nte"
+   },
+   "outputs": [],
+   "source": [
+    "# Example usage\n",
+    "def run_basic_example():\n",
+    "    # Instantiate the QA class with Groq model\n",
+    "    qa = QA(\n",
+    "        model_client=GroqAPIClient(),\n",
+    "        model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    "    )\n",
+    "\n",
+    "    # Print the QA instance details\n",
+    "    print(qa)\n",
+    "\n",
+    "    # Test the QA system\n",
+    "    response = qa(\"What is LLM?\")\n",
+    "    print(\"\\nResponse:\")\n",
+    "    print(response)\n",
+    "    print(f\"BasicQAOutput: {response.data}\")\n",
+    "    print(f\"Explanation: {response.data.explanation}\")\n",
+    "    print(f\"Example: {response.data.example}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "QA(\n",
+      "  (generator): Generator(\n",
+      "    model_kwargs={'model': 'llama3-8b-8192'}, trainable_prompt_kwargs=[]\n",
+      "    (prompt): Prompt(\n",
+      "      template: <SYS>\n",
+      "      You are a helpful assistant.\n",
+      "      <OUTPUT_FORMAT>\n",
+      "      {{output_format_str}}\n",
+      "      </OUTPUT_FORMAT>\n",
+      "      </SYS>\n",
+      "      <USER> {{input_str}} </USER>, prompt_kwargs: {'output_format_str': 'Your output should be formatted as a standard JSON instance with the following schema:\\n```\\n{\\n    \"explanation\": \"A brief explanation of the concept in one sentence. (str) (required)\",\\n    \"example\": \"An example of the concept in a sentence. (str) (required)\"\\n}\\n```\\n-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\\n-Use double quotes for the keys and string values.\\n-DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\\n-Follow the JSON formatting conventions.'}, prompt_variables: ['input_str', 'output_format_str']\n",
+      "    )\n",
+      "    (model_client): GroqAPIClient()\n",
+      "    (output_processors): DataClassParser(\n",
+      "      data_class=BasicQAOutput, format_type=json,            return_data_class=True, input_fields=[],            output_fields=['explanation', 'example']\n",
+      "      (_output_processor): JsonParser()\n",
+      "      (output_format_prompt): Prompt(\n",
+      "        template: Your output should be formatted as a standard JSON instance with the following schema:\n",
+      "        ```\n",
+      "        {{schema}}\n",
+      "        ```\n",
+      "        -Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n",
+      "        -Use double quotes for the keys and string values.\n",
+      "        -DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\n",
+      "        -Follow the JSON formatting conventions., prompt_variables: ['schema']\n",
+      "      )\n",
+      "    )\n",
+      "  )\n",
+      ")\n",
+      "\n",
+      "Response:\n",
+      "GeneratorOutput(id=None, data=BasicQAOutput(explanation='Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language', example='The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy'), error=None, usage=CompletionUsage(completion_tokens=60, prompt_tokens=174, total_tokens=234), raw_response='```\\n{\\n    \"explanation\": \"Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language\",\\n    \"example\": \"The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy\"\\n}\\n```', metadata=None)\n",
+      "BasicQAOutput: BasicQAOutput(explanation='Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language', example='The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy')\n",
+      "Explanation: Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language\n",
+      "Example: The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy\n"
+     ]
+    }
+   ],
+   "source": [
+    "run_basic_example()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1n7edLQ19ql8"
+   },
+   "source": [
+    "### Example 1 - Movie analysis data class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "id": "5Arp4-Dq9u49"
+   },
+   "outputs": [],
+   "source": [
+    "# 1. Basic DataClass with different field types\n",
+    "@dataclass\n",
+    "class MovieReview(adal.DataClass):\n",
+    "    title: str = field(\n",
+    "        metadata={\"desc\": \"The title of the movie\"}\n",
+    "    )\n",
+    "    rating: float = field(\n",
+    "        metadata={\n",
+    "            \"desc\": \"Rating from 1.0 to 10.0\",\n",
+    "            \"min\": 1.0,\n",
+    "            \"max\": 10.0\n",
+    "        }\n",
+    "    )\n",
+    "    pros: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of positive points about the movie\"}\n",
+    "    )\n",
+    "    cons: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of negative points about the movie\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"title\", \"rating\", \"pros\", \"cons\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "id": "VLbRUzXg9yP0"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "@dataclass\n",
+    "class Actor(adal.DataClass):\n",
+    "    name: str = field(metadata={\"desc\": \"Actor's full name\"})\n",
+    "    role: str = field(metadata={\"desc\": \"Character name in the movie\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "id": "7MUcu0tk91l4"
+   },
+   "outputs": [],
+   "source": [
+    "# 2. Nested DataClass example\n",
+    "\n",
+    "# Have both MovieReview and Actor nested in DetailedMovieReview\n",
+    "\n",
+    "@dataclass\n",
+    "class DetailedMovieReview(adal.DataClass):\n",
+    "    basic_review: MovieReview\n",
+    "    cast: List[Actor] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of main actors in the movie\"}\n",
+    "    )\n",
+    "    genre: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of genres for the movie\"}\n",
+    "    )\n",
+    "    recommend: bool = field(\n",
+    "        default_factory=str,\n",
+    "        metadata={\"desc\": \"Whether you would recommend this movie\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example template for movie review\n",
+    "movie_review_template = r\"\"\"<SYS>\n",
+    "You are a professional movie critic. Analyze the given movie and provide a detailed review.\n",
+    "<OUTPUT_FORMAT>\n",
+    "{{output_format_str}}\n",
+    "</OUTPUT_FORMAT>\n",
+    "</SYS>\n",
+    "<USER> Review this movie: {{movie_title}} </USER>\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the MovieReviewer component with MovieAnalysis data class\n",
+    "class MovieReviewer(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict, data_class: adal.DataClass):\n",
+    "        super().__init__()\n",
+    "        self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n",
+    "        parser = adal.DataClassParser(\n",
+    "            data_class=data_class,\n",
+    "            return_data_class=True\n",
+    "        )\n",
+    "        self.generator = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=movie_review_template,\n",
+    "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt},\n",
+    "            output_processors=parser,\n",
+    "        )\n",
+    "\n",
+    "    def call(self, movie_title: str):\n",
+    "        return self.generator.call({\"movie_title\": movie_title})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=8.5, pros=['Groundbreaking special effects', 'Intriguing story with complex themes', 'Well-developed characters', 'Excellent world-building'], cons=['Pacing can be slow in some parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=8.5, pros=['Groundbreaking special effects', 'Intriguing story with complex themes', 'Well-developed characters', 'Excellent world-building'], cons=['Pacing can be slow in some parts'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test the data class with one level of nesting\n",
+    "\n",
+    "reviewer = MovieReviewer(\n",
+    "    model_client=GroqAPIClient(),\n",
+    "    model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    "    data_class=DetailedMovieReview\n",
+    ")\n",
+    "\n",
+    "response = reviewer(\"The Matrix\")\n",
+    "print(f\"DetailedMovieReview: {response.data}\")\n",
+    "print(f\"BasicReview: {response.data.basic_review}\")\n",
+    "print(f\"Cast: {response.data.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.0, pros=['Innovative special effects and action sequences', 'Thought-provoking storyline', 'Engaging cyberpunk aesthetic', 'Strong performances from the cast', 'Iconic fight choreography'], cons=['Complex narrative that may confuse some viewers', 'Some dated CGI when compared to modern standards']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity'), Actor(name='Hugo Weaving', role='Agent Smith')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=9.0, pros=['Innovative special effects and action sequences', 'Thought-provoking storyline', 'Engaging cyberpunk aesthetic', 'Strong performances from the cast', 'Iconic fight choreography'], cons=['Complex narrative that may confuse some viewers', 'Some dated CGI when compared to modern standards'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity'), Actor(name='Hugo Weaving', role='Agent Smith')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# try use openai model\n",
+    "reviewer = MovieReviewer(\n",
+    "    model_client=adal.OpenAIClient(),\n",
+    "    model_kwargs={\"model\": \"gpt-4o\"},\n",
+    "    data_class=DetailedMovieReview\n",
+    ")\n",
+    "response = reviewer(\"The Matrix\")\n",
+    "print(f\"DetailedMovieReview: {response.data}\")\n",
+    "print(f\"BasicReview: {response.data.basic_review}\")\n",
+    "print(f\"Cast: {response.data.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see both models can handle one level of nested dataclass quite well. And the output ordering will follow the ordering specified in __output_fields__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "id": "ekr4v8Xg93en"
+   },
+   "outputs": [],
+   "source": [
+    "# 3. second level nested dataclass\n",
+    "\n",
+    "@dataclass\n",
+    "class MovieAnalysis(adal.DataClass):\n",
+    "    review: DetailedMovieReview\n",
+    "    box_office: float = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Box office earnings in millions of dollars\"}\n",
+    "    )\n",
+    "    awards: Dict[str, int] = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"review\", \"box_office\", \"awards\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MovieAnalysis: MovieAnalysis(review=DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Action', 'Science Fiction'], recommend=True), box_office=463.5, awards={'Best Visual Effects': 4, 'Best Film Editing': 2, 'Best Sound': 1})\n",
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Action', 'Science Fiction'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test the data class with two levels of nested dataclass\n",
+    "\n",
+    "# gpt-3.5-turbo model\n",
+    "\n",
+    "analysis = MovieReviewer(\n",
+    "    model_client=adal.OpenAIClient(),\n",
+    "    model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n",
+    "    data_class=MovieAnalysis\n",
+    ")\n",
+    "\n",
+    "response = analysis(\"The Matrix\")\n",
+    "print(f\"MovieAnalysis: {response.data}\")\n",
+    "print(f\"DetailedMovieReview: {response.data.review}\")\n",
+    "print(f\"BasicReview: {response.data.review.basic_review}\")\n",
+    "print(f\"Cast: {response.data.review.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MovieAnalysis: MovieAnalysis(review=DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True), box_office=463.5, awards={'Academy Awards': 4, 'MTV Movie Awards': 10, 'Saturn Awards': 7})\n",
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test the data class with two levels of nested dataclass\n",
+    "\n",
+    "analysis = MovieReviewer(\n",
+    "    model_client=GroqAPIClient(),\n",
+    "    model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    "    data_class=MovieAnalysis\n",
+    ")\n",
+    "\n",
+    "response = analysis(\"The Matrix\")\n",
+    "print(f\"MovieAnalysis: {response.data}\")\n",
+    "print(f\"DetailedMovieReview: {response.data.review}\")\n",
+    "print(f\"BasicReview: {response.data.review.basic_review}\")\n",
+    "print(f\"Cast: {response.data.review.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pSTrf8_t-DCx"
+   },
+   "source": [
+    "### Example 2: Song Review\n",
+    "Note: Song Review is modified by keeping Example 1 - Movie Review as a reference so that we would know how to use DataClasses for similar purposes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "id": "7g9bUa0q-B6Y"
+   },
+   "outputs": [],
+   "source": [
+    "# 1. Basic DataClass with different field types\n",
+    "@dataclass\n",
+    "class SongReview(adal.DataClass):\n",
+    "    title: str = field(\n",
+    "        metadata={\"desc\": \"The title of the song\"}\n",
+    "    )\n",
+    "    album: str = field(\n",
+    "        metadata={\"desc\": \"The album of the song\"}\n",
+    "    )\n",
+    "    ranking: int = field(\n",
+    "        metadata={\n",
+    "            \"desc\": \"Billboard peak ranking from 1 to 200\",\n",
+    "            \"min\": 1,\n",
+    "            \"max\": 200\n",
+    "        }\n",
+    "    )\n",
+    "    streaming: Dict[str, int] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"Dict of lastest approximate streaming count in spotify and in youtube. Gives the count in millions\"}\n",
+    "    )\n",
+    "    pros: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of positive points about the song\"}\n",
+    "    )\n",
+    "    cons: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of negative points about the song\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"title\", \"rating\", \"streaming\", \"pros\", \"cons\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "id": "UGhMRZht-HiB"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "@dataclass\n",
+    "class Artist(adal.DataClass):\n",
+    "    name: str = field(metadata={\"desc\": \"Artist's full name\"})\n",
+    "    role: str = field(metadata={\"desc\": \"Artist's role in the song\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "id": "sfNWgPYN-JAj"
+   },
+   "outputs": [],
+   "source": [
+    "# 2. Nested DataClass example\n",
+    "\n",
+    "@dataclass\n",
+    "class DetailedSongReview(adal.DataClass):\n",
+    "    basic_review: SongReview = field(\n",
+    "        default=SongReview, metadata={\"desc\": \"basic Song review details\"}\n",
+    "    )\n",
+    "    cast: List[Artist] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of main singer, lyrisist and musicians in the song\"}\n",
+    "    )\n",
+    "    genre: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of genres for the song\"}\n",
+    "    )\n",
+    "    recommend: bool = field(\n",
+    "        default_factory=str,\n",
+    "        metadata={\"desc\": \"Whether you would recommend this song\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "id": "HG8rtCd8-K7t"
+   },
+   "outputs": [],
+   "source": [
+    "# 3. two levels of nesting dataclass\n",
+    "\n",
+    "# all these fields as we use default, it is optional, so \n",
+    "# llm might not output that field if they dont have information\n",
+    "\n",
+    "@dataclass\n",
+    "class SongAnalysis(adal.DataClass):\n",
+    "    review: DetailedSongReview = field(\n",
+    "        default=DetailedSongReview, metadata={\"desc\": \"Song review details\"}\n",
+    "    )\n",
+    "    duration: float = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Duration of the song\"}\n",
+    "    )\n",
+    "    awards: Dict[str, int] = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"review\", \"duration\", \"awards\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "id": "v3mNeyz7-MpY"
+   },
+   "outputs": [],
+   "source": [
+    "# Example template for song review\n",
+    "song_review_template = r\"\"\"<SYS>\n",
+    "You are a professional song critic. Analyze the given song and provide a detailed review.\n",
+    "<OUTPUT_FORMAT>\n",
+    "{{output_format_str}}\n",
+    "</OUTPUT_FORMAT>\n",
+    "</SYS>\n",
+    "<USER> Review this song: {{song_title}} </USER>\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "id": "X2eifXOU-OrE"
+   },
+   "outputs": [],
+   "source": [
+    "# Create the SongReviewer component with SongAnalysis data class\n",
+    "class SongReviewer(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        super().__init__()\n",
+    "        self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n",
+    "        parser = adal.DataClassParser(\n",
+    "            data_class=SongAnalysis,\n",
+    "            return_data_class=False,\n",
+    "            format_type=\"json\"\n",
+    "        )\n",
+    "        self.generator = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=song_review_template,\n",
+    "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt },\n",
+    "            output_processors=parser,\n",
+    "        )\n",
+    "\n",
+    "    def call(self, song_title: str):\n",
+    "        return self.generator.call({\"song_title\": song_title})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SongAnalysis: {'review': {'basic_review': {'title': 'Shape of You', 'album': '÷ (Divide)', 'ranking': 7, 'streaming': {'spotify': 4.5, 'youtube': 2.5}, 'pros': ['Catchy beat', 'Catchy melody', 'Funky rhythm', 'Great lyrics'], 'cons': ['Some may find the lyrics objectifying', 'Not typically my cup of tea']}, 'cast': [{'name': 'Ed Sheeran', 'role': 'Lead vocals, songwriting'}], 'genre': ['Pop', 'Dance', 'Electro'], 'recommend': True}, 'duration': 3.53}\n"
+     ]
+    }
+   ],
+   "source": [
+    "analysis = SongReviewer(\n",
+    "     model_client=GroqAPIClient(),\n",
+    "     model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    ")\n",
+    "\n",
+    "response = analysis(\"Shape of you\")\n",
+    "print(f\"SongAnalysis: {response.data}\")\n",
+    "\n",
+    "# this time as we set `return_data_class` to False in the parser, we get the output as dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Song Title: Shape of You\n",
+      "Album: ÷ (Divide)\n",
+      "Ranking: 7\n",
+      "- spotify - 4.5 million views\n",
+      "- youtube - 2.5 million views\n",
+      "\n",
+      "Pros:\n",
+      "- Catchy beat\n",
+      "- Catchy melody\n",
+      "- Funky rhythm\n",
+      "- Great lyrics\n",
+      "\n",
+      "Artist's:\n",
+      "- Ed Sheeran as Lead vocals, songwriting\n",
+      "\n",
+      "Genere:  \n",
+      " Pop \n",
+      " Dance \n",
+      " Electro \n",
+      "\n",
+      "Duration: 3.53 minutes\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Access nested data\n",
+    "analysis = response.data\n",
+    "print(f\"Song Title: {analysis['review']['basic_review']['title']}\")\n",
+    "print(f\"Album: {analysis['review']['basic_review']['album']}\")\n",
+    "print(f\"Ranking: {analysis['review']['basic_review']['ranking']}\")\n",
+    "\n",
+    "for platform, views in analysis['review']['basic_review']['streaming'].items():\n",
+    "    print(f\"- {platform} - {views} million views\")\n",
+    "print(\"\\nPros:\")\n",
+    "for pro in analysis['review'][\"basic_review\"][\"pros\"]:\n",
+    "    print(f\"- {pro}\")\n",
+    "\n",
+    "print(\"\\nArtist's:\")\n",
+    "for actor in analysis['review'][\"cast\"]:\n",
+    "        print(f\"- {actor['name']} as {actor['role']}\")\n",
+    "\n",
+    "if analysis['review']['genre']:\n",
+    "    print(\"\\nGenere:  \")\n",
+    "    for genre in analysis['review']['genre']:\n",
+    "        print(f\" {genre} \")\n",
+    "\n",
+    "if analysis['duration']:\n",
+    "    print(f\"\\nDuration: {analysis['duration']} minutes\")\n",
+    "\n",
+    "if hasattr(analysis, 'awards') and analysis['awards']:\n",
+    "    print(\"\\nAwards:\")\n",
+    "    for category, count in analysis['awards'].items():\n",
+    "        print(f\"- {category}: {count}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "TODOs:\n",
+    "1. Add `JsonOutputParser` and `YamlOutputParser` to this notebook."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BLAF5qTEmoyW"
+   },
+   "source": [
+    "# Issues and feedback\n",
+    "\n",
+    "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
+    "\n",
+    "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [
+    "nqe-vxB1BCux",
+    "NGE70aZ8BLuf"
+   ],
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "my-project-kernel",
+   "language": "python",
+   "name": "my-project-kernel"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/tutorials/adalflow_modelclient.ipynb b/notebooks/tutorials/adalflow_modelclient.ipynb
new file mode 100644
index 00000000..1674c69a
--- /dev/null
+++ b/notebooks/tutorials/adalflow_modelclient.ipynb
@@ -0,0 +1,2063 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hGLYrUwBmvUD"
+   },
+   "source": [
+    "<div style=\"display: flex; justify-content: flex-start; align-items: center; gap: 15px; margin-bottom: 20px;\">\n",
+    "  <a target=\"_blank\" href=\"https://colab.research.google.com/github.com/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_modelclient.ipynb\">\n",
+    "    <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
+    "  </a>\n",
+    "  <a href=\"https://github.com/SylphAI-Inc/AdalFlow/blob/main/tutorials/adalflow_modelclient_sync_and_async.py\" target=\"_blank\" style=\"display: flex; align-items: center;\">\n",
+    "      <img src=\"https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png\" alt=\"GitHub\" style=\"height: 20px; width: 20px; margin-right: 5px;\">\n",
+    "      <span style=\"vertical-align: middle;\"> Open Source Code [Partial]</span>\n",
+    "  </a>\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gHK6HFngl6iP"
+   },
+   "source": [
+    "# 🤗 Welcome to AdalFlow!\n",
+    "## The PyTorch library to auto-optimize any LLM task pipelines\n",
+    "\n",
+    "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ <i>Star us on <a href=\"https://github.com/SylphAI-Inc/AdalFlow\">Github</a> </i> ⭐\n",
+    "\n",
+    "\n",
+    "# Quick Links\n",
+    "\n",
+    "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
+    "\n",
+    "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
+    "\n",
+    "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
+    "\n",
+    "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
+    "\n",
+    "# Author\n",
+    "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder/).\n",
+    "\n",
+    "# Outline\n",
+    "\n",
+    "This is a quick introduction of what AdalFlow is capable of. We will cover:\n",
+    "\n",
+    "* How to use model client in sync and async calls\n",
+    "* How to do develop custom model client using adalflow\n",
+    "\n",
+    "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n",
+    "\n",
+    "\n",
+    "# Installation\n",
+    "\n",
+    "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq`, and `faiss`(cpu version) from the extra packages.\n",
+    "\n",
+    "  ```bash\n",
+    "  pip install adalflow[openai,groq,faiss-cpu]\n",
+    "  ```\n",
+    "2. Setup  `openai` and `groq` API key in the environment variables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "nqe-vxB1BCux"
+   },
+   "source": [
+    "### Install adalflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "ZaaevxNH9JMQ"
+   },
+   "outputs": [],
+   "source": [
+    "# Install adalflow with necessary dependencies\n",
+    "from IPython.display import clear_output\n",
+    "\n",
+    "!pip install -U adalflow[openai,groq,faiss-cpu]\n",
+    "\n",
+    "clear_output()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NGE70aZ8BLuf"
+   },
+   "source": [
+    "### Set Environment Variables\n",
+    "\n",
+    "Note: Enter your api keys in below cell #todo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "j2xmGr_99YDq",
+    "outputId": "5f4ef3fe-3c20-481b-e4f6-93c14af1fd32"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing .env\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile .env\n",
+    "\n",
+    "OPENAI_API_KEY=\"PASTE-OPENAI_API_KEY_HERE\"\n",
+    "GROQ_API_KEY=\"PASTE-GROQ_API_KEY-HERE\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "etSUh9KNjmdy"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.utils import setup_env\n",
+    "\n",
+    "# Load environment variables - Make sure to have OPENAI_API_KEY in .env file and .env is present in current folder\n",
+    "setup_env(\".env\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ZxBkm77uBZpl"
+   },
+   "source": [
+    "### Basic Vannila Usage Example - model_client() - LLM Chat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "id": "wOAiKg899Z2u"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.components.model_client import OpenAIClient\n",
+    "from adalflow.core.types import ModelType"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "jv5124_27ioY",
+    "outputId": "8e593b49-4705-49c0-9501-58cee43831d1"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': 'User: What is the capital of France?\\n'}]}\n",
+      "response_text: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=16, total_tokens=23), raw_response='The capital of France is Paris.', metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize the OpenAI client for API interactions\n",
+    "openai_client = OpenAIClient()\n",
+    "query = \"What is the capital of France?\"\n",
+    "\n",
+    "# Set the model type to Large Language Model (LLM)\n",
+    "model_type = ModelType.LLM\n",
+    "\n",
+    "# Construct the prompt by formatting the user's query\n",
+    "prompt = f\"User: {query}\\n\"\n",
+    "\n",
+    "# Configure model parameters:\n",
+    "# - model: Specifies GPT-3.5-turbo as the model to use\n",
+    "# - temperature: Controls randomness (0.5 = balanced between deterministic and creative)\n",
+    "# - max_tokens: Limits the response length to 100 tokens\n",
+    "model_kwargs = {\"model\": \"gpt-3.5-turbo\", \"temperature\": 0.5, \"max_tokens\": 100}\n",
+    "\n",
+    "# Convert the inputs into the format required by OpenAI's API\n",
+    "api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n",
+    "    input=prompt, model_kwargs=model_kwargs, model_type=model_type\n",
+    ")\n",
+    "print(f\"api_kwargs: {api_kwargs}\")\n",
+    "\n",
+    "\n",
+    "response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)\n",
+    "\n",
+    "# Extract the text from the chat completion response\n",
+    "response_text = openai_client.parse_chat_completion(response)\n",
+    "print(f\"response_text: {response_text}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MBW5viOG9hM8"
+   },
+   "source": [
+    "### Basic Vannila Usage Example - model_client() - Embedding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "YA4pAIek9ewc",
+    "outputId": "442d9708-966d-498a-de06-6a4833ba93ac"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['What is the capital of France?', 'What is the capital of France?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.63402575, 0.24025092, 0.42818537, 0.37026355, -0.3518905, -0.041650757, -0.21627253, 0.21798527], index=0), Embedding(embedding=[0.63402575, 0.24025092, 0.42818537, 0.37026355, -0.3518905, -0.041650757, -0.21627253, 0.21798527], index=1)], model='text-embedding-3-small', usage=Usage(prompt_tokens=14, total_tokens=14), error=None, raw_response=None, input=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "openai_client = OpenAIClient()\n",
+    "query = \"What is the capital of France?\"\n",
+    "\n",
+    "# Specify the model type to be used, setting it to EMBEDDER for embedding functionality\n",
+    "model_type = ModelType.EMBEDDER\n",
+    "\n",
+    "# Create a batch of inputs by duplicating the query; useful for testing batch embedding capabilities\n",
+    "input = [query] * 2\n",
+    "\n",
+    "# Set the model's parameters:\n",
+    "# - \"text-embedding-3-small\" is the model being used\n",
+    "# - \"dimensions\" defines the dimensionality of the embeddings\n",
+    "# - \"encoding_format\" specifies the data format for the embeddings\n",
+    "model_kwargs = {\n",
+    "    \"model\": \"text-embedding-3-small\",\n",
+    "    \"dimensions\": 8,\n",
+    "    \"encoding_format\": \"float\",\n",
+    "}\n",
+    "\n",
+    "# Convert the inputs and model parameters to the format expected by the API using OpenAI client's helper method\n",
+    "api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n",
+    "    input=input, model_kwargs=model_kwargs, model_type=model_type\n",
+    ")\n",
+    "print(f\"api_kwargs: {api_kwargs}\")\n",
+    "\n",
+    "response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)\n",
+    "\n",
+    "# Parse the embedding response to extract the embeddings in a usable format\n",
+    "reponse_embedder_output = openai_client.parse_embedding_response(response)\n",
+    "print(f\"reponse_embedder_output: {reponse_embedder_output}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "k42xTszF34Lx"
+   },
+   "source": [
+    "### Adalflow - model_client() - **OpenAI model** Embedding Usage (ModelType.EMBEDDER)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "id": "NPT6NmaKiKJ9"
+   },
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "import numpy as np\n",
+    "from adalflow.core.types import ModelType, EmbedderOutput\n",
+    "from adalflow.components.model_client import OpenAIClient\n",
+    "from dataclasses import dataclass\n",
+    "from enum import Enum\n",
+    "from numpy.linalg import norm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "id": "Uwtb2sejiN6z"
+   },
+   "outputs": [],
+   "source": [
+    "@dataclass\n",
+    "class EmbeddingCollection:\n",
+    "    collection: List[float]\n",
+    "    cindex: int\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class Usage:\n",
+    "    prompt_tokens: int\n",
+    "    total_tokens: int"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "id": "qyzVv9qviUB1"
+   },
+   "outputs": [],
+   "source": [
+    "openai_client = OpenAIClient()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {
+    "id": "ufpyMmmZicVe"
+   },
+   "outputs": [],
+   "source": [
+    "def get_openai_embedding(text):\n",
+    "    # Set model type to EMBEDDER for embedding functionality\n",
+    "    model_type = ModelType.EMBEDDER\n",
+    "\n",
+    "    # Prepare input and model-specific parameters\n",
+    "    input = text\n",
+    "    model_kwargs = {\n",
+    "        \"model\": \"text-embedding-3-small\",\n",
+    "        \"dimensions\": 8,\n",
+    "        \"encoding_format\": \"float\",\n",
+    "    }\n",
+    "\n",
+    "    # Convert inputs to the required API format\n",
+    "    api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n",
+    "        input=input, model_kwargs=model_kwargs, model_type=model_type\n",
+    "    )\n",
+    "    print(f\"api_kwargs: {api_kwargs}\")  # Debug output to verify API arguments\n",
+    "\n",
+    "    # Call OpenAI API and parse response for embeddings\n",
+    "    response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)\n",
+    "    reponse_embedder_output = openai_client.parse_embedding_response(response)\n",
+    "    print(\n",
+    "        f\"reponse_embedder_output: {reponse_embedder_output}\"\n",
+    "    )  # Debug output to verify embeddings\n",
+    "    return reponse_embedder_output\n",
+    "\n",
+    "\n",
+    "def process_embeddings(embeddings_collection):\n",
+    "    # Extract embedding data for each item in the collection\n",
+    "    embeddingOutput = [emb.collection for emb in embeddings_collection]\n",
+    "    embeddingDataList = [each_emb_out.data for each_emb_out in embeddingOutput]\n",
+    "    embeddingList = [\n",
+    "        each_item.embedding\n",
+    "        for each_emb_data in embeddingDataList\n",
+    "        for each_item in each_emb_data\n",
+    "    ]\n",
+    "\n",
+    "    # Convert to numpy array for easier manipulation and calculations\n",
+    "    embeddings_array = np.array(embeddingList)\n",
+    "\n",
+    "    def calculate_similarity(emb1, emb2):\n",
+    "        # Compute cosine similarity between two embeddings\n",
+    "        return np.dot(emb1, emb2) / (norm(emb1) * norm(emb2))\n",
+    "\n",
+    "    def get_average_embedding(embeddings_list):\n",
+    "        # Calculate the mean embedding across a list of embeddings\n",
+    "        return np.mean(embeddings_list, axis=0)\n",
+    "\n",
+    "    def find_nearest_neighbors(\n",
+    "        query_index: int, embedding_list: List[List[float]], k: int = 5\n",
+    "    ):\n",
+    "        # Find top-k most similar embeddings to a query embedding, based on cosine similarity\n",
+    "        query_embedding = embedding_list[query_index]\n",
+    "        similarities = [\n",
+    "            (i, calculate_similarity(query_embedding, emb))\n",
+    "            for i, emb in enumerate(embedding_list)\n",
+    "            if i != query_index\n",
+    "        ]\n",
+    "        return sorted(similarities, key=lambda x: x[1], reverse=True)[:k]\n",
+    "\n",
+    "    # Return dictionary of functions and processed data for further use\n",
+    "    return {\n",
+    "        \"embeddings_array\": embeddings_array,\n",
+    "        \"calculate_similarity\": calculate_similarity,\n",
+    "        \"average_embedding\": get_average_embedding,\n",
+    "        \"find_nearest_neighbors\": find_nearest_neighbors,\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "# Demonstrate embeddings usage with sample data\n",
+    "def demonstrate_embeddings_usage(sample_embeddings, input_text_list):\n",
+    "    # Initialize processor and retrieve embeddings array\n",
+    "    processor = process_embeddings(sample_embeddings)\n",
+    "    embeddings = processor[\"embeddings_array\"]\n",
+    "\n",
+    "    print(\"1. Analyzing Semantic Similarities:\")\n",
+    "    print(\"-\" * 50)\n",
+    "\n",
+    "    # Select a few random indices for similarity testing\n",
+    "    num_indices = 5\n",
+    "    assert len(input_text_list) == len(embeddings)\n",
+    "    indices = np.random.choice(len(input_text_list), num_indices, replace=False)\n",
+    "    selected_text = np.array(input_text_list)[indices]\n",
+    "    selected_embeddings = np.array(embeddings)[indices]\n",
+    "\n",
+    "    # Display selected texts and their embeddings\n",
+    "    print(\"Selected indices:\", indices)\n",
+    "    print(\"Selected elements from array1:\", selected_text)\n",
+    "    print(\"Selected elements from array2:\", selected_embeddings)\n",
+    "\n",
+    "    # Calculate similarity between each pair of selected texts\n",
+    "    for i in range(len(selected_text)):\n",
+    "        for j in range(i + 1, len(selected_text)):\n",
+    "            similarity = processor[\"calculate_similarity\"](\n",
+    "                selected_embeddings[i], selected_embeddings[j]\n",
+    "            )\n",
+    "            print(f\"\\nComparing:\\n'{selected_text[i]}' \\nwith:\\n'{selected_text[j]}'\")\n",
+    "            print(f\"Similarity score: {similarity:.4f}\")\n",
+    "\n",
+    "    print(\"\\n2. Finding Nearest Neighbors:\")\n",
+    "    print(\"-\" * 50)\n",
+    "\n",
+    "    # Find and display the 3 nearest neighbors for the first text\n",
+    "    query_idx = 0\n",
+    "    neighbors = processor[\"find_nearest_neighbors\"](query_idx, embeddings, k=3)\n",
+    "    print(f\"\\nQuery text: '{input_text_list[query_idx]}'\")\n",
+    "    print(\"\\nNearest neighbors:\")\n",
+    "\n",
+    "    for idx, similarity in neighbors:\n",
+    "        print(f\"- '{input_text_list[idx]}' (similarity: {similarity:.4f})\")\n",
+    "\n",
+    "    print(\"\\n3. Using Average Embeddings:\")\n",
+    "    print(\"-\" * 50)\n",
+    "\n",
+    "    # Calculate and compare the average embedding for texts containing \"Paris\"\n",
+    "    paris_indices = [i for i, text in enumerate(input_text_list) if \"Paris\" in text]\n",
+    "    paris_embeddings = embeddings[paris_indices]\n",
+    "    avg_paris_embedding = processor[\"average_embedding\"](paris_embeddings)\n",
+    "\n",
+    "    print(\"\\nComparing average 'Paris' embedding with all texts:\")\n",
+    "    for i, text in enumerate(input_text_list):\n",
+    "        similarity = processor[\"calculate_similarity\"](\n",
+    "            avg_paris_embedding, embeddings[i]\n",
+    "        )\n",
+    "        print(f\"- '{text}' (similarity: {similarity:.4f})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {
+    "id": "EuvZkL2kifvF"
+   },
+   "outputs": [],
+   "source": [
+    "def run_model_client_embedding_usage():\n",
+    "    # Define a set of sample texts to test embedding and similarity functionalities\n",
+    "    sample_texts = [\n",
+    "        \"What is the capital of France?\",\n",
+    "        \"Paris is the capital of France.\",\n",
+    "        \"What is the population of France?\",\n",
+    "        \"How big is Paris?\",\n",
+    "        \"What is the weather like in Paris?\",\n",
+    "    ]\n",
+    "\n",
+    "    # Duplicate each sample text to form an input list with repeated entries (for embedding testing)\n",
+    "    input_text_list = [text for text in sample_texts for _ in range(2)]\n",
+    "\n",
+    "    # Generate embeddings for each text in the input list, and store them in an EmbeddingCollection\n",
+    "    embeddings_collection = [\n",
+    "        EmbeddingCollection(collection=get_openai_embedding(text), cindex=i)\n",
+    "        for i, text in enumerate(input_text_list)\n",
+    "    ]\n",
+    "    print(\n",
+    "        embeddings_collection\n",
+    "    )  # Debugging output to verify embeddings collection content\n",
+    "\n",
+    "    # Demonstrate the usage of embeddings by analyzing similarities, finding neighbors, etc.\n",
+    "    demonstrate_embeddings_usage(embeddings_collection, input_text_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "M9EpJd_7izCf",
+    "outputId": "ed1d938c-da36-4d1d-8cdf-5449047af403"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['What is the capital of France?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.63402575, 0.24025092, 0.42818537, 0.37026355, -0.3518905, -0.041650757, -0.21627253, 0.21798527], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['What is the capital of France?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.63402575, 0.24025092, 0.42818537, 0.37026355, -0.3518905, -0.041650757, -0.21627253, 0.21798527], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['Paris is the capital of France.']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.32851914, 0.31952682, -0.22016178, -0.34650376, -0.31563017, 0.49667537, -0.3447053, 0.395362], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['Paris is the capital of France.']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.32851914, 0.31952682, -0.22016178, -0.34650376, -0.31563017, 0.49667537, -0.3447053, 0.395362], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['What is the population of France?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.69818175, 0.33034775, 0.48146424, -0.041622937, -0.38599142, 0.13369127, -0.011028064, 0.05374008], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['What is the population of France?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.69818175, 0.33034775, 0.48146424, -0.041622937, -0.38599142, 0.13369127, -0.011028064, 0.05374008], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['How big is Paris?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.13988405, -0.35962427, 0.14219283, 0.0681765, -0.51662034, -0.116185255, -0.44545603, -0.58941436], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=5, total_tokens=5), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['How big is Paris?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[0.13988405, -0.35962427, 0.14219283, 0.0681765, -0.51662034, -0.116185255, -0.44545603, -0.58941436], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=5, total_tokens=5), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['What is the weather like in Paris?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[-0.16997108, -0.14322221, -0.6407088, -0.16881266, -0.08045719, -0.20030048, -0.021483332, -0.6815693], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=8, total_tokens=8), error=None, raw_response=None, input=None)\n",
+      "api_kwargs: {'model': 'text-embedding-3-small', 'dimensions': 8, 'encoding_format': 'float', 'input': ['What is the weather like in Paris?']}\n",
+      "reponse_embedder_output: EmbedderOutput(data=[Embedding(embedding=[-0.16997108, -0.14322221, -0.6407088, -0.16881266, -0.08045719, -0.20030048, -0.021483332, -0.6815693], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=8, total_tokens=8), error=None, raw_response=None, input=None)\n",
+      "[EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.63402575, 0.24025092, 0.42818537, 0.37026355, -0.3518905, -0.041650757, -0.21627253, 0.21798527], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None), cindex=0), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.63402575, 0.24025092, 0.42818537, 0.37026355, -0.3518905, -0.041650757, -0.21627253, 0.21798527], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None), cindex=1), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.32851914, 0.31952682, -0.22016178, -0.34650376, -0.31563017, 0.49667537, -0.3447053, 0.395362], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None), cindex=2), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.32851914, 0.31952682, -0.22016178, -0.34650376, -0.31563017, 0.49667537, -0.3447053, 0.395362], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None), cindex=3), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.69818175, 0.33034775, 0.48146424, -0.041622937, -0.38599142, 0.13369127, -0.011028064, 0.05374008], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None), cindex=4), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.69818175, 0.33034775, 0.48146424, -0.041622937, -0.38599142, 0.13369127, -0.011028064, 0.05374008], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None), cindex=5), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.13988405, -0.35962427, 0.14219283, 0.0681765, -0.51662034, -0.116185255, -0.44545603, -0.58941436], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=5, total_tokens=5), error=None, raw_response=None, input=None), cindex=6), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[0.13988405, -0.35962427, 0.14219283, 0.0681765, -0.51662034, -0.116185255, -0.44545603, -0.58941436], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=5, total_tokens=5), error=None, raw_response=None, input=None), cindex=7), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[-0.16997108, -0.14322221, -0.6407088, -0.16881266, -0.08045719, -0.20030048, -0.021483332, -0.6815693], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=8, total_tokens=8), error=None, raw_response=None, input=None), cindex=8), EmbeddingCollection(collection=EmbedderOutput(data=[Embedding(embedding=[-0.16997108, -0.14322221, -0.6407088, -0.16881266, -0.08045719, -0.20030048, -0.021483332, -0.6815693], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=8, total_tokens=8), error=None, raw_response=None, input=None), cindex=9)]\n",
+      "1. Analyzing Semantic Similarities:\n",
+      "--------------------------------------------------\n",
+      "Selected indices: [5 6 4 2 0]\n",
+      "Selected elements from array1: ['What is the population of France?' 'How big is Paris?'\n",
+      " 'What is the population of France?' 'Paris is the capital of France.'\n",
+      " 'What is the capital of France?']\n",
+      "Selected elements from array2: [[ 0.69818175  0.33034775  0.48146424 -0.04162294 -0.38599142  0.13369127\n",
+      "  -0.01102806  0.05374008]\n",
+      " [ 0.13988405 -0.35962427  0.14219283  0.0681765  -0.51662034 -0.11618526\n",
+      "  -0.44545603 -0.58941436]\n",
+      " [ 0.69818175  0.33034775  0.48146424 -0.04162294 -0.38599142  0.13369127\n",
+      "  -0.01102806  0.05374008]\n",
+      " [ 0.32851914  0.31952682 -0.22016178 -0.34650376 -0.31563017  0.49667537\n",
+      "  -0.3447053   0.395362  ]\n",
+      " [ 0.63402575  0.24025092  0.42818537  0.37026355 -0.3518905  -0.04165076\n",
+      "  -0.21627253  0.21798527]]\n",
+      "\n",
+      "Comparing:\n",
+      "'What is the population of France?' \n",
+      "with:\n",
+      "'How big is Paris?'\n",
+      "Similarity score: 0.2016\n",
+      "\n",
+      "Comparing:\n",
+      "'What is the population of France?' \n",
+      "with:\n",
+      "'What is the population of France?'\n",
+      "Similarity score: 1.0000\n",
+      "\n",
+      "Comparing:\n",
+      "'What is the population of France?' \n",
+      "with:\n",
+      "'Paris is the capital of France.'\n",
+      "Similarity score: 0.4566\n",
+      "\n",
+      "Comparing:\n",
+      "'What is the population of France?' \n",
+      "with:\n",
+      "'What is the capital of France?'\n",
+      "Similarity score: 0.8571\n",
+      "\n",
+      "Comparing:\n",
+      "'How big is Paris?' \n",
+      "with:\n",
+      "'What is the population of France?'\n",
+      "Similarity score: 0.2016\n",
+      "\n",
+      "Comparing:\n",
+      "'How big is Paris?' \n",
+      "with:\n",
+      "'Paris is the capital of France.'\n",
+      "Similarity score: -0.0980\n",
+      "\n",
+      "Comparing:\n",
+      "'How big is Paris?' \n",
+      "with:\n",
+      "'What is the capital of France?'\n",
+      "Similarity score: 0.2429\n",
+      "\n",
+      "Comparing:\n",
+      "'What is the population of France?' \n",
+      "with:\n",
+      "'Paris is the capital of France.'\n",
+      "Similarity score: 0.4566\n",
+      "\n",
+      "Comparing:\n",
+      "'What is the population of France?' \n",
+      "with:\n",
+      "'What is the capital of France?'\n",
+      "Similarity score: 0.8571\n",
+      "\n",
+      "Comparing:\n",
+      "'Paris is the capital of France.' \n",
+      "with:\n",
+      "'What is the capital of France?'\n",
+      "Similarity score: 0.3136\n",
+      "\n",
+      "2. Finding Nearest Neighbors:\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Query text: 'What is the capital of France?'\n",
+      "\n",
+      "Nearest neighbors:\n",
+      "- 'What is the capital of France?' (similarity: 1.0000)\n",
+      "- 'What is the population of France?' (similarity: 0.8571)\n",
+      "- 'What is the population of France?' (similarity: 0.8571)\n",
+      "\n",
+      "3. Using Average Embeddings:\n",
+      "--------------------------------------------------\n",
+      "\n",
+      "Comparing average 'Paris' embedding with all texts:\n",
+      "- 'What is the capital of France?' (similarity: -0.0168)\n",
+      "- 'What is the capital of France?' (similarity: -0.0168)\n",
+      "- 'Paris is the capital of France.' (similarity: 0.3752)\n",
+      "- 'Paris is the capital of France.' (similarity: 0.3752)\n",
+      "- 'What is the population of France?' (similarity: 0.0897)\n",
+      "- 'What is the population of France?' (similarity: 0.0897)\n",
+      "- 'How big is Paris?' (similarity: 0.7366)\n",
+      "- 'How big is Paris?' (similarity: 0.7366)\n",
+      "- 'What is the weather like in Paris?' (similarity: 0.6574)\n",
+      "- 'What is the weather like in Paris?' (similarity: 0.6574)\n"
+     ]
+    }
+   ],
+   "source": [
+    "run_model_client_embedding_usage()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "kOKN7jZAkZz0"
+   },
+   "source": [
+    "### Adalflow - model_client() - **OpenAI model** LLM Multichat Usage (ModelType.LLM)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "id": "VIQaK1dmkHiJ"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.components.model_client import OpenAIClient\n",
+    "from adalflow.core.types import ModelType\n",
+    "from adalflow.utils import setup_env\n",
+    "from typing import List, Dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "id": "BRRfPHh4kHY9"
+   },
+   "outputs": [],
+   "source": [
+    "class ChatConversation:\n",
+    "    def __init__(self):\n",
+    "        # Initialize the OpenAI client for managing API calls\n",
+    "        self.openai_client = OpenAIClient()\n",
+    "        # Initialize an empty conversation history to store chat messages\n",
+    "        self.conversation_history: str = \"\"\n",
+    "        # Model parameters to customize the API call\n",
+    "        self.model_kwargs = {\n",
+    "            \"model\": \"gpt-3.5-turbo\",\n",
+    "            \"temperature\": 0.5,  # Controls randomness; 0.5 for balanced responses\n",
+    "            \"max_tokens\": 100,  # Limits the response length\n",
+    "        }\n",
+    "\n",
+    "    def add_user_message(self, message: str):\n",
+    "        \"\"\"Add a user message to the conversation history\"\"\"\n",
+    "        self.conversation_history += (\n",
+    "            f\"<USER> {message} </USER>\"  # Format for user message\n",
+    "        )\n",
+    "\n",
+    "    def add_assistant_message(self, message: str):\n",
+    "        \"\"\"Add an assistant message to the conversation history\"\"\"\n",
+    "        self.conversation_history += (\n",
+    "            f\"<ASSISTANT> {message} </ASSISTANT>\"  # Format for assistant message\n",
+    "        )\n",
+    "\n",
+    "    def get_response(self) -> str:\n",
+    "        \"\"\"Get response from the model based on conversation history\"\"\"\n",
+    "        # Convert the conversation history and model parameters into API arguments\n",
+    "        api_kwargs = self.openai_client.convert_inputs_to_api_kwargs(\n",
+    "            input=self.conversation_history,\n",
+    "            model_kwargs=self.model_kwargs,\n",
+    "            model_type=ModelType.LLM,\n",
+    "        )\n",
+    "        print(f\"api_kwargs: {api_kwargs}\")  # Debugging output to verify API parameters\n",
+    "\n",
+    "        # Call the API with the generated arguments to get a response\n",
+    "        response = self.openai_client.call(\n",
+    "            api_kwargs=api_kwargs, model_type=ModelType.LLM\n",
+    "        )\n",
+    "        print(\"response: \", response)  # Debugging output for raw API response\n",
+    "\n",
+    "        # Extract and parse the text response from the API output\n",
+    "        response_text = self.openai_client.parse_chat_completion(response)\n",
+    "        # Update conversation history with the assistant's response\n",
+    "        self.add_assistant_message(response_text)\n",
+    "        return response_text  # Return the assistant's response to the caller"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "id": "9HWtlBnZkHLU"
+   },
+   "outputs": [],
+   "source": [
+    "def check_chat_conversation():\n",
+    "    # Initialize a new chat conversation\n",
+    "    chat = ChatConversation()\n",
+    "\n",
+    "    # Example list of user questions to simulate a multi-turn conversation\n",
+    "    questions = [\n",
+    "        \"What is the capital of France?\",\n",
+    "        \"What is its population?\",\n",
+    "        \"Tell me about its famous landmarks\",\n",
+    "    ]\n",
+    "\n",
+    "    # Iterate through each question in the list\n",
+    "    for question in questions:\n",
+    "        print(f\"\\nUser: {question}\")  # Display the user's question\n",
+    "        chat.add_user_message(\n",
+    "            question\n",
+    "        )  # Add the user question to the conversation history\n",
+    "\n",
+    "        response = (\n",
+    "            chat.get_response()\n",
+    "        )  # Get assistant's response based on conversation history\n",
+    "        print(f\"Assistant: {response}\")  # Display the assistant's response\n",
+    "\n",
+    "    # Display the full conversation history after all exchanges\n",
+    "    print(\"\\nFull Conversation History:\")\n",
+    "    print(chat.conversation_history)  # Print the accumulated conversation history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "mLFopVh0lJJh",
+    "outputId": "eb6d555e-1562-4c0c-ad94-57044a853eb4"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "User: What is the capital of France?\n",
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': '<USER> What is the capital of France? </USER>'}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-ASHotWDnw55BOd5d3zWzs0ucxztJr', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305047, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=20, total_tokens=27, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=20, total_tokens=27), raw_response='The capital of France is Paris.', metadata=None)\n",
+      "\n",
+      "User: What is its population?\n",
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': \"<USER> What is the capital of France? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=20, total_tokens=27), raw_response='The capital of France is Paris.', metadata=None) </ASSISTANT><USER> What is its population? </USER>\"}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-ASHouXidu63zZHiV9uFZ1rH5SFNYj', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The population of Paris, the capital of France, is approximately 2.2 million people.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305048, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=19, prompt_tokens=82, total_tokens=101, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=19, prompt_tokens=82, total_tokens=101), raw_response='The population of Paris, the capital of France, is approximately 2.2 million people.', metadata=None)\n",
+      "\n",
+      "User: Tell me about its famous landmarks\n",
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': \"<USER> What is the capital of France? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=20, total_tokens=27), raw_response='The capital of France is Paris.', metadata=None) </ASSISTANT><USER> What is its population? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=19, prompt_tokens=82, total_tokens=101), raw_response='The population of Paris, the capital of France, is approximately 2.2 million people.', metadata=None) </ASSISTANT><USER> Tell me about its famous landmarks </USER>\"}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-ASHovLb6YpzmwJ8Noc90GdeLpvvrW', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Paris, the capital of France, is known for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, Louvre Museum, Champs-Élysées, and Arc de Triomphe. These landmarks attract millions of tourists from around the world each year and are symbols of the city's rich history and cultural heritage.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305049, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=69, prompt_tokens=157, total_tokens=226, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=69, prompt_tokens=157, total_tokens=226), raw_response=\"Paris, the capital of France, is known for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, Louvre Museum, Champs-Élysées, and Arc de Triomphe. These landmarks attract millions of tourists from around the world each year and are symbols of the city's rich history and cultural heritage.\", metadata=None)\n",
+      "\n",
+      "Full Conversation History:\n",
+      "<USER> What is the capital of France? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=20, total_tokens=27), raw_response='The capital of France is Paris.', metadata=None) </ASSISTANT><USER> What is its population? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=19, prompt_tokens=82, total_tokens=101), raw_response='The population of Paris, the capital of France, is approximately 2.2 million people.', metadata=None) </ASSISTANT><USER> Tell me about its famous landmarks </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=69, prompt_tokens=157, total_tokens=226), raw_response=\"Paris, the capital of France, is known for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, Louvre Museum, Champs-Élysées, and Arc de Triomphe. These landmarks attract millions of tourists from around the world each year and are symbols of the city's rich history and cultural heritage.\", metadata=None) </ASSISTANT>\n"
+     ]
+    }
+   ],
+   "source": [
+    "# You can observe that each question is depended on previous question and the chat responds in apt manner\n",
+    "check_chat_conversation()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BhD8AJWyldK-"
+   },
+   "source": [
+    "### Adalflow - model_client() - **OpenAI model** LLM Multichat Usage (ModelType.LLM) - asynchronous (async())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "id": "AUjjiCnulcF8"
+   },
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "from adalflow.components.model_client import OpenAIClient\n",
+    "from adalflow.core.types import ModelType\n",
+    "from typing import List"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "id": "Z5i9_q3vlo3C"
+   },
+   "outputs": [],
+   "source": [
+    "class ChatConversationAsync:\n",
+    "    def __init__(self):\n",
+    "        # Initialize with an asynchronous OpenAI client\n",
+    "        self.openai_client = OpenAIClient()\n",
+    "\n",
+    "        # Default model parameters for the chat\n",
+    "        self.model_kwargs = {\n",
+    "            \"model\": \"gpt-3.5-turbo\",  # Model used for chat\n",
+    "            \"temperature\": 0.5,  # Controls randomness in response\n",
+    "            \"max_tokens\": 100,  # Maximum tokens in the generated response\n",
+    "        }\n",
+    "\n",
+    "    async def get_response(self, message: str) -> str:\n",
+    "        \"\"\"Asynchronously get a response from the model for a given user message\"\"\"\n",
+    "\n",
+    "        # Convert input message and model parameters into the format expected by the API\n",
+    "        api_kwargs = self.openai_client.convert_inputs_to_api_kwargs(\n",
+    "            input=message,  # User's message input\n",
+    "            model_kwargs=self.model_kwargs,  # Model-specific settings\n",
+    "            model_type=ModelType.LLM,  # Specify the model type as a language model (LLM)\n",
+    "        )\n",
+    "        print(f\"api_kwargs: {api_kwargs}\")  # Log the API arguments for debugging\n",
+    "\n",
+    "        # Make an asynchronous API call to OpenAI's model\n",
+    "        response = await self.openai_client.acall(\n",
+    "            api_kwargs=api_kwargs,  # Pass the prepared arguments\n",
+    "            model_type=ModelType.LLM,  # Specify the model type again\n",
+    "        )\n",
+    "        print(\"response: \", response)  # Print the raw response from the API\n",
+    "\n",
+    "        # Parse the API response to extract the assistant's reply (chat completion)\n",
+    "        response_text = self.openai_client.parse_chat_completion(response)\n",
+    "        return response_text  # Return the parsed response text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "id": "nNVmwsO2lrWX"
+   },
+   "outputs": [],
+   "source": [
+    "async def check_chat_conversations_async():\n",
+    "    # Create an instance of ChatConversationAsync to handle asynchronous operations\n",
+    "    chat = ChatConversationAsync()\n",
+    "\n",
+    "    # List of unrelated questions that will be handled in parallel\n",
+    "    questions = [\n",
+    "        \"What is the capital of France?\",  # Question 1\n",
+    "        \"Is dog a wild animal?\",  # Question 2\n",
+    "        \"Tell me about amazon forest\",  # Question 3\n",
+    "    ]\n",
+    "\n",
+    "    # Create a list of asynchronous tasks, one for each question\n",
+    "    # Each task calls the get_response method asynchronously for a question\n",
+    "    tasks = [chat.get_response(question) for question in questions]\n",
+    "\n",
+    "    # Gather the results of all asynchronous tasks concurrently\n",
+    "    responses = await asyncio.gather(*tasks)\n",
+    "\n",
+    "    # Print the responses from the assistant along with the respective user questions\n",
+    "    for question, response in zip(questions, responses):\n",
+    "        print(f\"\\nUser: {question}\")\n",
+    "        print(f\"Assistant: {response}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "T8hdUjjeltVo",
+    "outputId": "53dc0385-afb6-4268-c3cc-dde9385b565e"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': 'What is the capital of France?'}]}\n",
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': 'Is dog a wild animal?'}]}\n",
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': 'Tell me about amazon forest'}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-ASHqEOWoBOIiulzd0aoXeyKKb9npb', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305130, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=14, total_tokens=21, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "response:  ChatCompletion(id='chatcmpl-ASHqE6WAmS4wnRMdD20PdjsdyOcuP', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Dogs were originally domesticated from wolves thousands of years ago, and they are now considered domestic animals. While they share some characteristics with wild animals, such as hunting instincts and pack behavior, dogs have been bred and trained by humans for companionship and various roles, such as working dogs, service animals, and pets. So, in general, dogs are not considered wild animals.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305130, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=76, prompt_tokens=13, total_tokens=89, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "response:  ChatCompletion(id='chatcmpl-ASHqEexoH4z6uZsDkoRwwmyFuoSyZ', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='The Amazon Rainforest, also known as the Amazon Jungle, is a vast and dense tropical rainforest that covers much of the Amazon Basin in South America. It is the largest rainforest in the world, spanning over 5.5 million square kilometers (2.1 million square miles) across nine countries, including Brazil, Peru, Colombia, and Venezuela.\\n\\nThe Amazon Rainforest is home to an incredibly diverse array of plant and animal species, many of which are found nowhere else on Earth. It', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305130, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=12, total_tokens=112, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "\n",
+      "User: What is the capital of France?\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=14, total_tokens=21), raw_response='The capital of France is Paris.', metadata=None)\n",
+      "\n",
+      "User: Is dog a wild animal?\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=76, prompt_tokens=13, total_tokens=89), raw_response='Dogs were originally domesticated from wolves thousands of years ago, and they are now considered domestic animals. While they share some characteristics with wild animals, such as hunting instincts and pack behavior, dogs have been bred and trained by humans for companionship and various roles, such as working dogs, service animals, and pets. So, in general, dogs are not considered wild animals.', metadata=None)\n",
+      "\n",
+      "User: Tell me about amazon forest\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=12, total_tokens=112), raw_response='The Amazon Rainforest, also known as the Amazon Jungle, is a vast and dense tropical rainforest that covers much of the Amazon Basin in South America. It is the largest rainforest in the world, spanning over 5.5 million square kilometers (2.1 million square miles) across nine countries, including Brazil, Peru, Colombia, and Venezuela.\\n\\nThe Amazon Rainforest is home to an incredibly diverse array of plant and animal species, many of which are found nowhere else on Earth. It', metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Run the asynchronous function if in a file\n",
+    "# asyncio.run(check_chat_conversations_async())\n",
+    "\n",
+    "# in jupyter notebook\n",
+    "await check_chat_conversations_async()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "_VvhvKmimfIt"
+   },
+   "source": [
+    "### Adalflow - model_client() - **OpenAI model** LLM Multichat Usage (ModelType.LLM) - Benchmark sync() vs async()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "id": "tMT0BsaRmt6z"
+   },
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "import time\n",
+    "from adalflow.components.model_client import (\n",
+    "    OpenAIClient,\n",
+    ")  # Assuming OpenAIClient with .call() and .acall() is available\n",
+    "from adalflow.core.types import ModelType"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "id": "QrzYgdsEm9sz"
+   },
+   "outputs": [],
+   "source": [
+    "# Initialize the OpenAI client\n",
+    "openai_client = OpenAIClient()\n",
+    "\n",
+    "# Sample prompt for testing\n",
+    "prompt = \"Tell me a joke.\"\n",
+    "\n",
+    "model_kwargs = {\"model\": \"gpt-3.5-turbo\", \"temperature\": 0.5, \"max_tokens\": 100}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "id": "amdQNSmvnB-L"
+   },
+   "outputs": [],
+   "source": [
+    "# Synchronous function for benchmarking .call()\n",
+    "def benchmark_sync_call(api_kwargs, runs=10):\n",
+    "    \"\"\"\n",
+    "    Benchmark the synchronous .call() method by running it multiple times.\n",
+    "\n",
+    "    Parameters:\n",
+    "    - api_kwargs: The arguments to be passed to the API call\n",
+    "    - runs: The number of times to run the call (default is 10)\n",
+    "    \"\"\"\n",
+    "    # List to store responses\n",
+    "    responses = []\n",
+    "\n",
+    "    # Record the start time of the benchmark\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # Perform synchronous API calls for the specified number of runs\n",
+    "    responses = [\n",
+    "        openai_client.call(\n",
+    "            api_kwargs=api_kwargs,  # API arguments\n",
+    "            model_type=ModelType.LLM,  # Model type (e.g., LLM for language models)\n",
+    "        )\n",
+    "        for _ in range(runs)  # Repeat 'runs' times\n",
+    "    ]\n",
+    "\n",
+    "    # Record the end time after all calls are completed\n",
+    "    end_time = time.time()\n",
+    "\n",
+    "    # Output the results of each synchronous call\n",
+    "    for i, response in enumerate(responses):\n",
+    "        print(f\"sync call {i + 1} completed: {response}\")\n",
+    "\n",
+    "    # Print the total time taken for all synchronous calls\n",
+    "    print(f\"\\nSynchronous benchmark completed in {end_time - start_time:.2f} seconds\")\n",
+    "\n",
+    "\n",
+    "# Asynchronous function for benchmarking .acall()\n",
+    "async def benchmark_async_acall(api_kwargs, runs=10):\n",
+    "    \"\"\"\n",
+    "    Benchmark the asynchronous .acall() method by running it multiple times concurrently.\n",
+    "\n",
+    "    Parameters:\n",
+    "    - api_kwargs: The arguments to be passed to the API call\n",
+    "    - runs: The number of times to run the asynchronous call (default is 10)\n",
+    "    \"\"\"\n",
+    "    # Record the start time of the benchmark\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # Create a list of asynchronous tasks for the specified number of runs\n",
+    "    tasks = [\n",
+    "        openai_client.acall(\n",
+    "            api_kwargs=api_kwargs,  # API arguments\n",
+    "            model_type=ModelType.LLM,  # Model type (e.g., LLM for language models)\n",
+    "        )\n",
+    "        for _ in range(runs)  # Repeat 'runs' times\n",
+    "    ]\n",
+    "\n",
+    "    # Execute all tasks concurrently and wait for them to finish\n",
+    "    responses = await asyncio.gather(*tasks)\n",
+    "\n",
+    "    # Record the end time after all tasks are completed\n",
+    "    end_time = time.time()\n",
+    "\n",
+    "    # Output the results of each asynchronous call\n",
+    "    for i, response in enumerate(responses):\n",
+    "        print(f\"Async call {i + 1} completed: {response}\")\n",
+    "\n",
+    "    # Print the total time taken for all asynchronous calls\n",
+    "    print(f\"\\nAsynchronous benchmark completed in {end_time - start_time:.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "FNmiGG_bnD7Q",
+    "outputId": "242071e3-18e1-44aa-c99a-8fac71fbb84c"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting synchronous benchmark...\n",
+      "\n",
+      "sync call 1 completed: ChatCompletion(id='chatcmpl-ASHqYcxCVNAnLlsrnRvxh5cRrQOwf', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself? Because it was two-tired!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305150, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 2 completed: ChatCompletion(id='chatcmpl-ASHqZz3G3jqGlHtKjoO9mbYjjS1Af', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the scarecrow win an award? Because he was outstanding in his field!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305151, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 3 completed: ChatCompletion(id='chatcmpl-ASHqZJmWUUDSrVElavHZOCuvSNQ8q', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305151, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 4 completed: ChatCompletion(id='chatcmpl-ASHqdMDEfY4pVAAom6RbmvnsBYdc1', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself? Because it was two-tired!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305155, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 5 completed: ChatCompletion(id='chatcmpl-ASHqdrGYZofAuDFGyAPq7mPsub78v', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why don't scientists trust atoms?\\n\\nBecause they make up everything!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305155, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=13, prompt_tokens=12, total_tokens=25, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 6 completed: ChatCompletion(id='chatcmpl-ASHqerqL9a6ev9YRNaSzy3mskQOmY', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the scarecrow win an award? Because he was outstanding in his field!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305156, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 7 completed: ChatCompletion(id='chatcmpl-ASHqeQq3iPrHepIld9SSg2Pzsxc4N', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself? Because it was two tired!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305156, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=12, total_tokens=28, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 8 completed: ChatCompletion(id='chatcmpl-ASHqfD6yeMEwZ6StOT8Ncd00R3No1', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself? Because it was two tired!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305157, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=12, total_tokens=28, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 9 completed: ChatCompletion(id='chatcmpl-ASHqgl57WJILsEu4PUj59MayzYnZr', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why don't scientists trust atoms? \\n\\nBecause they make up everything!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305158, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=14, prompt_tokens=12, total_tokens=26, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "sync call 10 completed: ChatCompletion(id='chatcmpl-ASHqgoVKX9legJ43xcSkLR4kgRxTn', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why was the math book sad?\\n\\nBecause it had too many problems.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305158, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=14, prompt_tokens=12, total_tokens=26, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "\n",
+      "Synchronous benchmark completed in 8.77 seconds\n",
+      "\n",
+      "Starting asynchronous benchmark...\n",
+      "\n",
+      "Async call 1 completed: ChatCompletion(id='chatcmpl-ASHqhKMKo8PMbdyiYpNHBQW4oU3J7', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself? Because it was two tired!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=12, total_tokens=28, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 2 completed: ChatCompletion(id='chatcmpl-ASHqhWwUpBXRbgKstjV6qei7FzgfG', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why don't scientists trust atoms?\\n\\nBecause they make up everything!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=13, prompt_tokens=12, total_tokens=25, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 3 completed: ChatCompletion(id='chatcmpl-ASHqhgTqUmUh5FW2nwTyRLagoKMQ5', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the scarecrow win an award? Because he was outstanding in his field!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 4 completed: ChatCompletion(id='chatcmpl-ASHqhYzaxguL4P2MDG1AakTiGMIyg', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the scarecrow win an award? Because he was outstanding in his field!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 5 completed: ChatCompletion(id='chatcmpl-ASHqhsdbfpywUP4KBhqPvUNOcOm1x', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the scarecrow win an award? Because he was outstanding in his field!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 6 completed: ChatCompletion(id='chatcmpl-ASHqhqdaOKxe5zjf4vpKZAFbH8x5n', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself? Because it was two tired!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=12, total_tokens=28, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 7 completed: ChatCompletion(id='chatcmpl-ASHqhrXadr2Tf62QM4SAXjLg8iSql', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the golfer bring two pairs of pants? In case he got a hole in one!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=20, prompt_tokens=12, total_tokens=32, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 8 completed: ChatCompletion(id='chatcmpl-ASHqh8mQiGew9qwCOY5UgUilx2SYL', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the scarecrow win an award? Because he was outstanding in his field!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 9 completed: ChatCompletion(id='chatcmpl-ASHqhNWUNOnSj9LLE7utDW0wz7USX', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why don't scientists trust atoms?\\n\\nBecause they make up everything!\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=13, prompt_tokens=12, total_tokens=25, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "Async call 10 completed: ChatCompletion(id='chatcmpl-ASHqhX5u0K2xFoFxyhebnOI9WsT0l', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the scarecrow win an award? Because he was outstanding in his field!', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305159, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "\n",
+      "Asynchronous benchmark completed in 0.75 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n",
+    "    input=prompt, model_kwargs=model_kwargs, model_type=ModelType.LLM\n",
+    ")\n",
+    "\n",
+    "# Run both benchmarks\n",
+    "print(\"Starting synchronous benchmark...\\n\")\n",
+    "benchmark_sync_call(api_kwargs)\n",
+    "\n",
+    "print(\"\\nStarting asynchronous benchmark...\\n\")\n",
+    "await benchmark_async_acall(api_kwargs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "QtbUd2K-nPaL"
+   },
+   "source": [
+    "### Adalflow - model_client() - **OpenAI model** LLM Multichat Usage (ModelType.LLM) - Additional Utils -\n",
+    "- get_first_message_content()\n",
+    "- get_all_messages_content()\n",
+    "- get_probabilities()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "id": "ghyzD7tynO4A"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.components.model_client import OpenAIClient\n",
+    "from adalflow.core.types import ModelType\n",
+    "from adalflow.utils import setup_env\n",
+    "from adalflow.components.model_client.openai_client import (\n",
+    "    get_first_message_content,\n",
+    "    get_all_messages_content,\n",
+    "    get_probabilities,\n",
+    ")\n",
+    "from adalflow.core import Generator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "id": "QAaOFTZVn4Yx"
+   },
+   "outputs": [],
+   "source": [
+    "def check_openai_additional_utils(func, model_kwargs):\n",
+    "    \"\"\"\n",
+    "    This function demonstrates the usage of the OpenAI client and a custom utility function\n",
+    "    for generating responses from the LLM model, based on the given query in openai client.\n",
+    "\n",
+    "    Parameters:\n",
+    "    - func: A function that will be used to parse the chat completion (for custom parsing).\n",
+    "    - model_kwargs: The additional model parameters (e.g., temperature, max_tokens) to be used in the model.\n",
+    "\n",
+    "    Returns:\n",
+    "    - output: The generated response from the model based on the query.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Initialize the OpenAI client with a custom chat completion parser\n",
+    "    openai_client = OpenAIClient(chat_completion_parser=func)\n",
+    "\n",
+    "    # Define a sample query (user question)\n",
+    "    query = \"What is the capital of France?\"\n",
+    "\n",
+    "    # Set the model type to LLM (Large Language Model)\n",
+    "    model_type = ModelType.LLM\n",
+    "\n",
+    "    # Create the prompt by formatting the user query as a conversation\n",
+    "    prompt = f\"User: {query}\\n\"\n",
+    "\n",
+    "    # Define any additional parameters needed for the model (e.g., the input string)\n",
+    "    prompt_kwargs = {\n",
+    "        \"input_str\": \"What is the capital of France?\",\n",
+    "    }\n",
+    "\n",
+    "    # Initialize the Generator with the OpenAI client and model parameters\n",
+    "    generator = Generator(model_client=openai_client, model_kwargs=model_kwargs)\n",
+    "\n",
+    "    # Execute the generator to get a response for the prompt (using the defined prompt_kwargs)\n",
+    "    output = generator(prompt_kwargs=prompt_kwargs)\n",
+    "\n",
+    "    # Return the generated output (response from the LLM)\n",
+    "    return output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "id": "hNnBpFjkoXil"
+   },
+   "outputs": [],
+   "source": [
+    "def run_utils_functions():\n",
+    "    \"\"\"\n",
+    "    This function runs a series of utility functions using different model\n",
+    "    configurations for generating responses. It demonstrates how to check\n",
+    "    OpenAI model outputs using various utility functions.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Define the model arguments for the probability-based function (with logprobs)\n",
+    "    probability_model_kwargs = {\n",
+    "        \"model\": \"gpt-3.5-turbo\",  # Specify the model version\n",
+    "        \"logprobs\": True,  # Enable logprobs to get probability distributions for tokens\n",
+    "        \"n\": 2,  # Request 2 different completions for each query\n",
+    "    }\n",
+    "\n",
+    "    # Define general model arguments for most other functions\n",
+    "    model_kwargs = {\n",
+    "        \"model\": \"gpt-3.5-turbo\",  # Specify the model version\n",
+    "        \"temperature\": 0.5,  # Control the randomness of responses (0 is deterministic)\n",
+    "        \"max_tokens\": 100,  # Set the maximum number of tokens (words) in the response\n",
+    "    }\n",
+    "\n",
+    "    # List of functions to run with corresponding model arguments\n",
+    "    func_list = [\n",
+    "        [\n",
+    "            get_probabilities,\n",
+    "            probability_model_kwargs,\n",
+    "        ],  # Function to get probabilities with specific kwargs\n",
+    "        [\n",
+    "            get_first_message_content,\n",
+    "            model_kwargs,\n",
+    "        ],  # Function to get first message content\n",
+    "        [\n",
+    "            get_all_messages_content,\n",
+    "            model_kwargs,\n",
+    "        ],  # Function to get all messages content in multi-chat scenarios\n",
+    "    ]\n",
+    "\n",
+    "    # Loop through each function and its corresponding arguments\n",
+    "    for each_func in func_list:\n",
+    "        # Check the function output using the specified arguments\n",
+    "        result = check_openai_additional_utils(each_func[0], each_func[1])\n",
+    "\n",
+    "        # Print the function and result for debugging purposes\n",
+    "        print(f\"Function: {each_func[0].__name__}, Model Args: {each_func[1]}\")\n",
+    "        print(f\"Result: {result}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "mU6kFzslo6qr",
+    "outputId": "29e6b00e-99d3-4189-d161-3c79806fd19d"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-7.076218e-05, top_logprobs=[]), ChatCompletionTokenLogprob(token=' capital', bytes=[32, 99, 97, 112, 105, 116, 97, 108], logprob=-1.9361265e-07, top_logprobs=[]), ChatCompletionTokenLogprob(token=' of', bytes=[32, 111, 102], logprob=-0.00020163313, top_logprobs=[]), ChatCompletionTokenLogprob(token=' France', bytes=[32, 70, 114, 97, 110, 99, 101], logprob=-1.2664457e-06, top_logprobs=[]), ChatCompletionTokenLogprob(token=' is', bytes=[32, 105, 115], logprob=-6.704273e-07, top_logprobs=[]), ChatCompletionTokenLogprob(token=' Paris', bytes=[32, 80, 97, 114, 105, 115], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token='.', bytes=[46], logprob=-2.1769476e-05, top_logprobs=[])]\n",
+      "[ChatCompletionTokenLogprob(token='The', bytes=[84, 104, 101], logprob=-7.076218e-05, top_logprobs=[]), ChatCompletionTokenLogprob(token=' capital', bytes=[32, 99, 97, 112, 105, 116, 97, 108], logprob=-1.9361265e-07, top_logprobs=[]), ChatCompletionTokenLogprob(token=' of', bytes=[32, 111, 102], logprob=-0.00020163313, top_logprobs=[]), ChatCompletionTokenLogprob(token=' France', bytes=[32, 70, 114, 97, 110, 99, 101], logprob=-1.2664457e-06, top_logprobs=[]), ChatCompletionTokenLogprob(token=' is', bytes=[32, 105, 115], logprob=-6.704273e-07, top_logprobs=[]), ChatCompletionTokenLogprob(token=' Paris', bytes=[32, 80, 97, 114, 105, 115], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token='.', bytes=[46], logprob=-2.1769476e-05, top_logprobs=[])]\n",
+      "Function: get_probabilities, Model Args: {'model': 'gpt-3.5-turbo', 'logprobs': True, 'n': 2}\n",
+      "Result: GeneratorOutput(id=None, data=[[TokenLogProb(token='The', logprob=-7.076218e-05), TokenLogProb(token=' capital', logprob=-1.9361265e-07), TokenLogProb(token=' of', logprob=-0.00020163313), TokenLogProb(token=' France', logprob=-1.2664457e-06), TokenLogProb(token=' is', logprob=-6.704273e-07), TokenLogProb(token=' Paris', logprob=0.0), TokenLogProb(token='.', logprob=-2.1769476e-05)], [TokenLogProb(token='The', logprob=-7.076218e-05), TokenLogProb(token=' capital', logprob=-1.9361265e-07), TokenLogProb(token=' of', logprob=-0.00020163313), TokenLogProb(token=' France', logprob=-1.2664457e-06), TokenLogProb(token=' is', logprob=-6.704273e-07), TokenLogProb(token=' Paris', logprob=0.0), TokenLogProb(token='.', logprob=-2.1769476e-05)]], error=None, usage=CompletionUsage(completion_tokens=14, prompt_tokens=48, total_tokens=62), raw_response=[[TokenLogProb(token='The', logprob=-7.076218e-05), TokenLogProb(token=' capital', logprob=-1.9361265e-07), TokenLogProb(token=' of', logprob=-0.00020163313), TokenLogProb(token=' France', logprob=-1.2664457e-06), TokenLogProb(token=' is', logprob=-6.704273e-07), TokenLogProb(token=' Paris', logprob=0.0), TokenLogProb(token='.', logprob=-2.1769476e-05)], [TokenLogProb(token='The', logprob=-7.076218e-05), TokenLogProb(token=' capital', logprob=-1.9361265e-07), TokenLogProb(token=' of', logprob=-0.00020163313), TokenLogProb(token=' France', logprob=-1.2664457e-06), TokenLogProb(token=' is', logprob=-6.704273e-07), TokenLogProb(token=' Paris', logprob=0.0), TokenLogProb(token='.', logprob=-2.1769476e-05)]], metadata=None)\n",
+      "Function: get_first_message_content, Model Args: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100}\n",
+      "Result: GeneratorOutput(id=None, data='The capital of France is Paris.', error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=48, total_tokens=55), raw_response='The capital of France is Paris.', metadata=None)\n",
+      "Function: get_all_messages_content, Model Args: {'model': 'gpt-3.5-turbo', 'temperature': 0.5, 'max_tokens': 100}\n",
+      "Result: GeneratorOutput(id=None, data=['The capital of France is Paris.'], error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=48, total_tokens=55), raw_response=['The capital of France is Paris.'], metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "run_utils_functions()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "mkvEEtXLrDZm"
+   },
+   "source": [
+    "### Adalflow - model_client() - **Groq model** LLM Multichat Usage (ModelType.LLM)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "id": "SFHuk3RErCvP"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.components.model_client import GroqAPIClient\n",
+    "from adalflow.core.types import ModelType\n",
+    "from adalflow.utils import setup_env\n",
+    "from typing import List, Dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "id": "cN4hsbLdrS7k"
+   },
+   "outputs": [],
+   "source": [
+    "class ChatConversation:\n",
+    "    def __init__(self):\n",
+    "        \"\"\"\n",
+    "        Initialize a new ChatConversation object.\n",
+    "        - GroqAPIClient is used to interact with the Groq model.\n",
+    "        - conversation_history keeps track of the conversation between the user and assistant.\n",
+    "        - model_kwargs contains the model parameters like temperature and max tokens.\n",
+    "        \"\"\"\n",
+    "        self.groq_client = (\n",
+    "            GroqAPIClient()\n",
+    "        )  # Initialize GroqAPIClient for model interaction\n",
+    "        self.conversation_history: str = (\n",
+    "            \"\"  # Initialize conversation history as an empty string\n",
+    "        )\n",
+    "        self.model_kwargs = {\n",
+    "            \"model\": \"llama3-8b-8192\",  # Specify the model to use\n",
+    "            \"temperature\": 0.5,  # Set the temperature for response variability\n",
+    "            \"max_tokens\": 100,  # Limit the number of tokens in the response\n",
+    "        }\n",
+    "\n",
+    "    def add_user_message(self, message: str):\n",
+    "        \"\"\"\n",
+    "        Add a user message to the conversation history in the required format.\n",
+    "        The message is wrapped with <USER> tags for better processing by the assistant.\n",
+    "        \"\"\"\n",
+    "        self.conversation_history += (\n",
+    "            f\"<USER> {message} </USER>\"  # Append user message to history\n",
+    "        )\n",
+    "\n",
+    "    def add_assistant_message(self, message: str):\n",
+    "        \"\"\"\n",
+    "        Add an assistant message to the conversation history in the required format.\n",
+    "        The message is wrapped with <ASSISTANT> tags for better processing.\n",
+    "        \"\"\"\n",
+    "        self.conversation_history += (\n",
+    "            f\"<ASSISTANT> {message} </ASSISTANT>\"  # Append assistant message to history\n",
+    "        )\n",
+    "\n",
+    "    def get_response(self) -> str:\n",
+    "        \"\"\"\n",
+    "        Generate a response from the assistant based on the conversation history.\n",
+    "        - Converts the conversation history and model kwargs into the format required by the Groq API.\n",
+    "        - Calls the API to get the response.\n",
+    "        - Parses and adds the assistant's reply to the conversation history.\n",
+    "        \"\"\"\n",
+    "        # Prepare the request for the Groq API, converting the inputs into the correct format\n",
+    "        api_kwargs = self.groq_client.convert_inputs_to_api_kwargs(\n",
+    "            input=self.conversation_history,  # Use the conversation history as input\n",
+    "            model_kwargs=self.model_kwargs,  # Include model-specific parameters\n",
+    "            model_type=ModelType.LLM,  # Specify the model type (Large Language Model)\n",
+    "        )\n",
+    "        print(f\"api_kwargs: {api_kwargs}\")  # Log the API request parameters\n",
+    "\n",
+    "        # Call the Groq model API to get the response\n",
+    "        response = self.groq_client.call(\n",
+    "            api_kwargs=api_kwargs,\n",
+    "            model_type=ModelType.LLM,  # Specify the model type again for clarity\n",
+    "        )\n",
+    "        print(\"response: \", response)  # Log the API response\n",
+    "\n",
+    "        # Parse the response to extract the assistant's reply\n",
+    "        response_text = self.groq_client.parse_chat_completion(response)\n",
+    "\n",
+    "        # Add the assistant's message to the conversation history\n",
+    "        self.add_assistant_message(response_text)\n",
+    "\n",
+    "        # Return the assistant's response text\n",
+    "        return response_text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "id": "pvqsFTEsrV2M"
+   },
+   "outputs": [],
+   "source": [
+    "def check_chat_conversation():\n",
+    "    \"\"\"\n",
+    "    This function simulates a multi-turn conversation between a user and an assistant.\n",
+    "    It demonstrates how user inputs are processed, and the assistant generates responses,\n",
+    "    while maintaining the conversation history for each query.\n",
+    "    \"\"\"\n",
+    "    # Initialize the ChatConversation object\n",
+    "    chat = ChatConversation()  # This creates an instance of the ChatConversation class\n",
+    "\n",
+    "    # Define a list of user questions for a multi-turn conversation\n",
+    "    questions = [\n",
+    "        \"What is the capital of France?\",  # First user question\n",
+    "        \"What is its population?\",  # Second user question\n",
+    "        \"Tell me about its famous landmarks\",  # Third user question\n",
+    "    ]\n",
+    "\n",
+    "    # Loop through each question and get the assistant's response\n",
+    "    for question in questions:\n",
+    "        # Print the current question from the user\n",
+    "        print(f\"\\nUser: {question}\")\n",
+    "\n",
+    "        # Add the user's message to the conversation history\n",
+    "        chat.add_user_message(question)\n",
+    "\n",
+    "        # Get the assistant's response based on the conversation history\n",
+    "        response = chat.get_response()\n",
+    "\n",
+    "        # Print the assistant's response\n",
+    "        print(f\"Assistant: {response}\")\n",
+    "\n",
+    "    # After the conversation, print the full conversation history\n",
+    "    print(\"\\nFull Conversation History:\")\n",
+    "    print(\n",
+    "        chat.conversation_history\n",
+    "    )  # This will print all messages (user and assistant) in the conversation history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "dBNWQn_arXcE",
+    "outputId": "743e5d80-8a6b-4b0f-cff2-af11f0df051d"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "User: What is the capital of France?\n",
+      "api_kwargs: {'model': 'llama3-8b-8192', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': '<USER> What is the capital of France? </USER>'}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-c68fccb5-ed2b-4745-be81-acbac792387f', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', role='assistant', function_call=None, tool_calls=None))], created=1731305352, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=8, prompt_tokens=23, total_tokens=31, completion_time=0.006666667, prompt_time=0.003034232, queue_time=0.010475318, total_time=0.009700899), x_groq={'id': 'req_01jccxebfgf5qbnaea72y9atrm'})\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=23, total_tokens=31), raw_response='The capital of France is Paris.', metadata=None)\n",
+      "\n",
+      "User: What is its population?\n",
+      "api_kwargs: {'model': 'llama3-8b-8192', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': \"<USER> What is the capital of France? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=23, total_tokens=31), raw_response='The capital of France is Paris.', metadata=None) </ASSISTANT><USER> What is its population? </USER>\"}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-e6ff7c1e-437c-49d9-bef7-5c6834d3e169', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The population of Paris, the capital of France, is approximately 2.1 million people within its city limits. However, the metropolitan area of Paris, which includes the surrounding suburbs, has a population of over 12.2 million people, making it one of the most populous metropolitan areas in Europe.', role='assistant', function_call=None, tool_calls=None))], created=1731305352, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=62, prompt_tokens=85, total_tokens=147, completion_time=0.051666667, prompt_time=0.003680399, queue_time=0.009721731, total_time=0.055347066), x_groq={'id': 'req_01jccxebk7ejstbdxzerdj643q'})\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=62, prompt_tokens=85, total_tokens=147), raw_response='The population of Paris, the capital of France, is approximately 2.1 million people within its city limits. However, the metropolitan area of Paris, which includes the surrounding suburbs, has a population of over 12.2 million people, making it one of the most populous metropolitan areas in Europe.', metadata=None)\n",
+      "\n",
+      "User: Tell me about its famous landmarks\n",
+      "api_kwargs: {'model': 'llama3-8b-8192', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': \"<USER> What is the capital of France? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=23, total_tokens=31), raw_response='The capital of France is Paris.', metadata=None) </ASSISTANT><USER> What is its population? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=62, prompt_tokens=85, total_tokens=147), raw_response='The population of Paris, the capital of France, is approximately 2.1 million people within its city limits. However, the metropolitan area of Paris, which includes the surrounding suburbs, has a population of over 12.2 million people, making it one of the most populous metropolitan areas in Europe.', metadata=None) </ASSISTANT><USER> Tell me about its famous landmarks </USER>\"}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-6d202bb8-d1fc-471e-a7cd-9dd63fe4f9b8', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content=\"GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=50, prompt_tokens=74, total_tokens=124), raw_response='Paris, the capital of France, is famous for its stunning architecture, art museums, and iconic landmarks. Some of the most famous landmarks in Paris include:\\n\\n* The Eiffel Tower: Built for the 1889 World\\\\'s Fair, the Eiffel Tower is an iron lattice tower that stands 324 meters tall and is\", role='assistant', function_call=None, tool_calls=None))], created=1731305352, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=100, prompt_tokens=202, total_tokens=302, completion_time=0.083333333, prompt_time=0.008920166, queue_time=0.006389374, total_time=0.092253499), x_groq={'id': 'req_01jccxebrfemjb5ag1a66d6jxc'})\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=202, total_tokens=302), raw_response=\"GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=50, prompt_tokens=74, total_tokens=124), raw_response='Paris, the capital of France, is famous for its stunning architecture, art museums, and iconic landmarks. Some of the most famous landmarks in Paris include:\\n\\n* The Eiffel Tower: Built for the 1889 World\\\\'s Fair, the Eiffel Tower is an iron lattice tower that stands 324 meters tall and is\", metadata=None)\n",
+      "\n",
+      "Full Conversation History:\n",
+      "<USER> What is the capital of France? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=23, total_tokens=31), raw_response='The capital of France is Paris.', metadata=None) </ASSISTANT><USER> What is its population? </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=62, prompt_tokens=85, total_tokens=147), raw_response='The population of Paris, the capital of France, is approximately 2.1 million people within its city limits. However, the metropolitan area of Paris, which includes the surrounding suburbs, has a population of over 12.2 million people, making it one of the most populous metropolitan areas in Europe.', metadata=None) </ASSISTANT><USER> Tell me about its famous landmarks </USER><ASSISTANT> GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=202, total_tokens=302), raw_response=\"GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=50, prompt_tokens=74, total_tokens=124), raw_response='Paris, the capital of France, is famous for its stunning architecture, art museums, and iconic landmarks. Some of the most famous landmarks in Paris include:\\n\\n* The Eiffel Tower: Built for the 1889 World\\\\'s Fair, the Eiffel Tower is an iron lattice tower that stands 324 meters tall and is\", metadata=None) </ASSISTANT>\n"
+     ]
+    }
+   ],
+   "source": [
+    "check_chat_conversation()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "EhF6taMXniS7"
+   },
+   "source": [
+    "### Adalflow - model_client() - **Groq model** LLM Multichat Usage (ModelType.LLM) - asynchronous (async())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "id": "6pqSxmL_s11g"
+   },
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "from adalflow.components.model_client import GroqAPIClient\n",
+    "from adalflow.core.types import ModelType\n",
+    "from typing import List"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "id": "40LRTSyOr884"
+   },
+   "outputs": [],
+   "source": [
+    "class ChatConversation:\n",
+    "    def __init__(self):\n",
+    "        # Using an asynchronous client for communication with GroqAPI\n",
+    "        self.groq_client = GroqAPIClient()  # Create an instance of GroqAPIClient\n",
+    "        # Model configuration parameters (e.g., Llama model with 8b parameters and 8192 context length)\n",
+    "        self.model_kwargs = {\n",
+    "            \"model\": \"llama3-8b-8192\",  # Llama model with specific size\n",
+    "            \"temperature\": 0.5,  # Degree of randomness in the model's responses\n",
+    "            \"max_tokens\": 100,  # Maximum number of tokens in the response\n",
+    "        }\n",
+    "\n",
+    "    async def get_response(self, message: str) -> str:\n",
+    "        \"\"\"Get response from the model for a single message asynchronously\"\"\"\n",
+    "\n",
+    "        # Convert the user input message to the appropriate format for the Groq API\n",
+    "        api_kwargs = self.groq_client.convert_inputs_to_api_kwargs(\n",
+    "            input=message,  # User's input message\n",
+    "            model_kwargs=self.model_kwargs,  # Model parameters\n",
+    "            model_type=ModelType.LLM,  # Model type for large language models (LLM)\n",
+    "        )\n",
+    "        print(f\"api_kwargs: {api_kwargs}\")  # Print the API arguments for debugging\n",
+    "\n",
+    "        # Asynchronously call the Groq API with the provided API arguments\n",
+    "        response = await self.groq_client.acall(\n",
+    "            api_kwargs=api_kwargs,  # Pass the API arguments\n",
+    "            model_type=ModelType.LLM,  # Specify the model type\n",
+    "        )\n",
+    "        print(\"response: \", response)  # Print the API response for debugging\n",
+    "\n",
+    "        # Parse the response to extract the assistant's reply from the API response\n",
+    "        response_text = self.groq_client.parse_chat_completion(response)\n",
+    "        return response_text  # Return the assistant's response text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "id": "Y-n1ksBSsC-J"
+   },
+   "outputs": [],
+   "source": [
+    "async def check_chat_conversations():\n",
+    "    # Create an instance of ChatConversation\n",
+    "    chat = ChatConversation()\n",
+    "\n",
+    "    # List of unrelated questions for independent async calls\n",
+    "    questions = [\n",
+    "        \"What is the capital of France?\",\n",
+    "        \"Is dog a wild animal ?\",\n",
+    "        \"Tell me about amazon forest\",\n",
+    "    ]\n",
+    "\n",
+    "    # Run each question as an independent asynchronous task\n",
+    "    tasks = [chat.get_response(question) for question in questions]\n",
+    "    # Gather all the responses concurrently\n",
+    "    responses = await asyncio.gather(*tasks)\n",
+    "\n",
+    "    # Display each response alongside the question\n",
+    "    for question, response in zip(questions, responses):\n",
+    "        print(f\"\\nUser: {question}\")\n",
+    "        print(f\"Assistant: {response}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "kvqOTUknsKMI",
+    "outputId": "df47682f-db10-4439-98fc-7cd0c8486776"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "api_kwargs: {'model': 'llama3-8b-8192', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': 'What is the capital of France?'}]}\n",
+      "api_kwargs: {'model': 'llama3-8b-8192', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': 'Is dog a wild animal ?'}]}\n",
+      "api_kwargs: {'model': 'llama3-8b-8192', 'temperature': 0.5, 'max_tokens': 100, 'messages': [{'role': 'system', 'content': 'Tell me about amazon forest'}]}\n",
+      "response:  ChatCompletion(id='chatcmpl-d2fb086a-5d23-409e-b060-4c00578611fe', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', role='assistant', function_call=None, tool_calls=None))], created=1731305379, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=8, prompt_tokens=17, total_tokens=25, completion_time=0.006666667, prompt_time=0.003519913, queue_time=0.010127806000000001, total_time=0.01018658), x_groq={'id': 'req_01jccxf5szf5sas99m0xhrz2g8'})\n",
+      "response:  ChatCompletion(id='chatcmpl-37af21d1-dd36-4ee4-a4f3-6cce914b25dd', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='The answer to this question is a bit nuanced.\\n\\nDomesticated dogs (Canis lupus familiaris) are not considered wild animals in the classical sense. They have been selectively bred by humans for thousands of years, which has led to significant changes in their behavior, physiology, and genetics. As a result, domesticated dogs have adapted to living alongside humans and have lost many of the characteristics that define wild animals.\\n\\nHowever, there are some feral dog populations that have descended from domesticated dogs', role='assistant', function_call=None, tool_calls=None))], created=1731305379, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=100, prompt_tokens=16, total_tokens=116, completion_time=0.083333333, prompt_time=0.005273133, queue_time=0.007805676, total_time=0.088606466), x_groq={'id': 'req_01jccxf5t0epbv6dxgj28hvjpt'})\n",
+      "response:  ChatCompletion(id='chatcmpl-85a584e8-5647-4112-84ec-bc770f16b091', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='The Amazon rainforest, also known as Amazonia, is the largest tropical rainforest in the world, covering an area of over 5.5 million square kilometers (2.1 million square miles) across nine countries in South America, including Brazil, Peru, Colombia, Venezuela, Ecuador, Bolivia, Guyana, Suriname, and French Guiana.\\n\\nHere are some fascinating facts about the Amazon rainforest:\\n\\n1. Biodiversity hotspots: The Amazon rainforest is home to an estimated', role='assistant', function_call=None, tool_calls=None))], created=1731305379, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=100, prompt_tokens=15, total_tokens=115, completion_time=0.086005899, prompt_time=0.000504017, queue_time=0.014784051999999999, total_time=0.086509916), x_groq={'id': 'req_01jccxf5ste18rkg69qqmfrjnk'})\n",
+      "\n",
+      "User: What is the capital of France?\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=17, total_tokens=25), raw_response='The capital of France is Paris.', metadata=None)\n",
+      "\n",
+      "User: Is dog a wild animal ?\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=16, total_tokens=116), raw_response='The answer to this question is a bit nuanced.\\n\\nDomesticated dogs (Canis lupus familiaris) are not considered wild animals in the classical sense. They have been selectively bred by humans for thousands of years, which has led to significant changes in their behavior, physiology, and genetics. As a result, domesticated dogs have adapted to living alongside humans and have lost many of the characteristics that define wild animals.\\n\\nHowever, there are some feral dog populations that have descended from domesticated dogs', metadata=None)\n",
+      "\n",
+      "User: Tell me about amazon forest\n",
+      "Assistant: GeneratorOutput(id=None, data=None, error=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=15, total_tokens=115), raw_response='The Amazon rainforest, also known as Amazonia, is the largest tropical rainforest in the world, covering an area of over 5.5 million square kilometers (2.1 million square miles) across nine countries in South America, including Brazil, Peru, Colombia, Venezuela, Ecuador, Bolivia, Guyana, Suriname, and French Guiana.\\n\\nHere are some fascinating facts about the Amazon rainforest:\\n\\n1. Biodiversity hotspots: The Amazon rainforest is home to an estimated', metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "await check_chat_conversations()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "wHO2_4L7sOnL"
+   },
+   "source": [
+    "### Adalflow - model_client() - **Groq model** LLM Multichat Usage (ModelType.LLM) - Benchmark sync() vs async()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "id": "4yGh8iy8sON1"
+   },
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "import time\n",
+    "from adalflow.components.model_client import (\n",
+    "    GroqAPIClient,\n",
+    ")  # Assuming GroqAPI with .call() and .acall() is available\n",
+    "from adalflow.core.types import ModelType"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "id": "DkMkbt7DtDQv"
+   },
+   "outputs": [],
+   "source": [
+    "# Initialize the Groq client\n",
+    "groq_client = GroqAPIClient()\n",
+    "\n",
+    "# Sample prompt for testing\n",
+    "prompt = \"Tell me a joke.\"\n",
+    "\n",
+    "model_kwargs = {\"model\": \"llama3-8b-8192\", \"temperature\": 0.5, \"max_tokens\": 100}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "id": "IUEJ6KM7tDQw"
+   },
+   "outputs": [],
+   "source": [
+    "# Synchronous function for benchmarking .call()\n",
+    "def benchmark_sync_call(api_kwargs, runs=10):\n",
+    "    # List to store responses from each synchronous call\n",
+    "    responses = []\n",
+    "\n",
+    "    # Record the start time for benchmarking\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # Perform synchronous API calls in a loop\n",
+    "    responses = [\n",
+    "        groq_client.call(  # Calling the API synchronously\n",
+    "            api_kwargs=api_kwargs,  # Passing the API arguments\n",
+    "            model_type=ModelType.LLM,  # Defining the model type\n",
+    "        )\n",
+    "        for _ in range(runs)  # Repeat the call 'runs' times\n",
+    "    ]\n",
+    "\n",
+    "    # Record the end time after all calls are completed\n",
+    "    end_time = time.time()\n",
+    "\n",
+    "    # Print out the response from each synchronous call\n",
+    "    for i, response in enumerate(responses):\n",
+    "        print(f\"sync call {i + 1} completed: {response}\")\n",
+    "\n",
+    "    # Print the total time taken for the synchronous benchmark\n",
+    "    print(f\"\\nSynchronous benchmark completed in {end_time - start_time:.2f} seconds\")\n",
+    "\n",
+    "\n",
+    "# Asynchronous function for benchmarking .acall()\n",
+    "async def benchmark_async_acall(api_kwargs, runs=10):\n",
+    "    # Record the start time for benchmarking\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # Create a list of tasks for asynchronous API calls\n",
+    "    tasks = [\n",
+    "        groq_client.acall(  # Calling the API asynchronously\n",
+    "            api_kwargs=api_kwargs,  # Passing the API arguments\n",
+    "            model_type=ModelType.LLM,  # Defining the model type\n",
+    "        )\n",
+    "        for _ in range(runs)  # Repeat the call 'runs' times\n",
+    "    ]\n",
+    "\n",
+    "    # Await the completion of all tasks concurrently\n",
+    "    responses = await asyncio.gather(\n",
+    "        *tasks\n",
+    "    )  # Gather all the responses from asynchronous calls\n",
+    "\n",
+    "    # Record the end time after all asynchronous calls are completed\n",
+    "    end_time = time.time()\n",
+    "\n",
+    "    # Print out the response from each asynchronous call\n",
+    "    for i, response in enumerate(responses):\n",
+    "        print(f\"Async call {i + 1} completed: {response}\")\n",
+    "\n",
+    "    # Print the total time taken for the asynchronous benchmark\n",
+    "    print(f\"\\nAsynchronous benchmark completed in {end_time - start_time:.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "idOjFAo8tDQw",
+    "outputId": "cb790957-8960-4e58-a7de-39dfd0dd3504"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting synchronous benchmark...\n",
+      "\n",
+      "sync call 1 completed: ChatCompletion(id='chatcmpl-a6bc4231-b712-4014-a87d-0e9368f5d8f4', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.000141559, queue_time=0.01454033, total_time=0.015141559), x_groq={'id': 'req_01jccxfkx7epcsynkkex05e6v6'})\n",
+      "sync call 2 completed: ChatCompletion(id='chatcmpl-00586f1c-f6fb-4650-a549-ff24d462c6bf', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.000141569, queue_time=0.013657111000000001, total_time=0.015141569), x_groq={'id': 'req_01jccxfm15fs0vyr85remr47wm'})\n",
+      "sync call 3 completed: ChatCompletion(id='chatcmpl-a5fe8868-ca01-445e-89ba-d5791da524fa', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.000138579, queue_time=0.014364931000000001, total_time=0.015138579), x_groq={'id': 'req_01jccxfm4ye4z89hff0f8d0yas'})\n",
+      "sync call 4 completed: ChatCompletion(id='chatcmpl-7ae04f5f-79c0-49b4-9f08-decc05393809', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.002182427, queue_time=0.011133002, total_time=0.017182427), x_groq={'id': 'req_01jccxfm8wf4pacws56qqkbcrg'})\n",
+      "sync call 5 completed: ChatCompletion(id='chatcmpl-4023328d-0e1b-4127-b124-06b1d2ec4c86', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.000136529, queue_time=0.013371651, total_time=0.015136529), x_groq={'id': 'req_01jccxfmcpfs0twwzvvvrf8g5s'})\n",
+      "sync call 6 completed: ChatCompletion(id='chatcmpl-9713209a-bbad-491b-8eec-7f9ba3faf0c0', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.002243946, queue_time=0.011401844, total_time=0.017243946), x_groq={'id': 'req_01jccxfmgcf85vtdzmt7mwfk8x'})\n",
+      "sync call 7 completed: ChatCompletion(id='chatcmpl-1bf326d8-68f8-4117-801e-4146d0085114', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Here's one:\\n\\nWhy couldn't the bicycle stand up by itself?\\n\\n(Wait for it...)\\n\\nBecause it was two-tired!\\n\\nHope that made you laugh!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=33, prompt_tokens=15, total_tokens=48, completion_time=0.0275, prompt_time=0.002932829, queue_time=0.011706590000000001, total_time=0.030432829), x_groq={'id': 'req_01jccxfmm7e4ztjp9fn0kkbjx0'})\n",
+      "sync call 8 completed: ChatCompletion(id='chatcmpl-1d5ecb3b-c923-4c36-a89b-ad086ee677e6', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305394, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.002190067, queue_time=0.011227092999999999, total_time=0.017190067), x_groq={'id': 'req_01jccxfmrgfdpbjbb07248341m'})\n",
+      "sync call 9 completed: ChatCompletion(id='chatcmpl-d5f1ff90-9100-472b-aad0-2e18e67a1871', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.000136839, queue_time=0.014356821, total_time=0.015136839), x_groq={'id': 'req_01jccxfmw9f4p9qvktvtp0g557'})\n",
+      "sync call 10 completed: ChatCompletion(id='chatcmpl-c19f72d7-a2c4-48e3-848f-bef6a514a842', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.002243396, queue_time=0.011192144, total_time=0.017243396), x_groq={'id': 'req_01jccxfn06f85td634z5vyhzrt'})\n",
+      "\n",
+      "Synchronous benchmark completed in 1.42 seconds\n",
+      "\n",
+      "Starting asynchronous benchmark...\n",
+      "\n",
+      "Async call 1 completed: ChatCompletion(id='chatcmpl-06c89067-a76f-484a-87ba-159f6b36564a', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.002921139, queue_time=0.015738821, total_time=0.017921139), x_groq={'id': 'req_01jccxfn9cejvbpr29s0k0nkhr'})\n",
+      "Async call 2 completed: ChatCompletion(id='chatcmpl-2a5e8ccf-8058-4a77-a60a-5f7b86c71fb9', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.004858751, queue_time=0.010167037, total_time=0.019858751), x_groq={'id': 'req_01jccxfn9dfrxvvexkv623ezng'})\n",
+      "Async call 3 completed: ChatCompletion(id='chatcmpl-54d3f2e8-5603-4d2f-8396-b72a2716da2a', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.002244876, queue_time=0.012254712000000001, total_time=0.017244876), x_groq={'id': 'req_01jccxfn8ye8tasgfq5hzjrzyd'})\n",
+      "Async call 4 completed: ChatCompletion(id='chatcmpl-a4fd586f-1ec8-423b-af69-b0300b940d11', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.002188018, queue_time=0.01165656, total_time=0.017188018), x_groq={'id': 'req_01jccxfn96e0b9swhyd96cs7mg'})\n",
+      "Async call 5 completed: ChatCompletion(id='chatcmpl-4d63c669-7242-4f31-be2d-b31eb0870245', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Here's one:\\n\\nWhy couldn't the bicycle stand up by itself?\\n\\n(wait for it...)\\n\\nBecause it was two-tired!\\n\\nHope that made you laugh!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=32, prompt_tokens=15, total_tokens=47, completion_time=0.026666667, prompt_time=0.002829583, queue_time=0.011314187, total_time=0.02949625), x_groq={'id': 'req_01jccxfn9ee4zrdjw0n9jktjkt'})\n",
+      "Async call 6 completed: ChatCompletion(id='chatcmpl-5c30e90c-135b-49dc-8f8e-966fdb391dc7', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_179b0f92c9', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.000155179, queue_time=0.014245601, total_time=0.015155179), x_groq={'id': 'req_01jccxfna4f5vv7b66gyk9zwam'})\n",
+      "Async call 7 completed: ChatCompletion(id='chatcmpl-eda5d2dc-82e3-40ca-a544-c770726bc8d0', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.004997677, queue_time=0.008474321, total_time=0.019997677), x_groq={'id': 'req_01jccxfn9ff8ar78qnbtnqryec'})\n",
+      "Async call 8 completed: ChatCompletion(id='chatcmpl-1132c5ca-1ba2-49ae-94ee-359c3049d4d1', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.014361024, queue_time=0.0029951239999999983, total_time=0.029361024), x_groq={'id': 'req_01jccxfndfe1b8hre70xfj9cde'})\n",
+      "Async call 9 completed: ChatCompletion(id='chatcmpl-44ea61fb-a1a8-4b70-a5b0-96d793041a48', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_a97cfe35ae', usage=CompletionUsage(completion_tokens=18, prompt_tokens=15, total_tokens=33, completion_time=0.015, prompt_time=0.004858171, queue_time=0.010396207000000001, total_time=0.019858171), x_groq={'id': 'req_01jccxfn9gfh49k150pw1gsysz'})\n",
+      "Async call 10 completed: ChatCompletion(id='chatcmpl-2ac98624-8d3f-41f8-abef-5f8b5aebf7ab', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Here's one:\\n\\nWhy couldn't the bicycle stand up by itself?\\n\\n(Wait for it...)\\n\\nBecause it was two-tired!\\n\\nHope that made you laugh!\", role='assistant', function_call=None, tool_calls=None))], created=1731305395, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_6a6771ae9c', usage=CompletionUsage(completion_tokens=33, prompt_tokens=15, total_tokens=48, completion_time=0.0275, prompt_time=0.002554123, queue_time=0.010962996, total_time=0.030054123), x_groq={'id': 'req_01jccxfn9de0b97f7wj7kvsznw'})\n",
+      "\n",
+      "Asynchronous benchmark completed in 0.44 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "api_kwargs = groq_client.convert_inputs_to_api_kwargs(\n",
+    "    input=prompt, model_kwargs=model_kwargs, model_type=ModelType.LLM\n",
+    ")\n",
+    "\n",
+    "# Run both benchmarks\n",
+    "print(\"Starting synchronous benchmark...\\n\")\n",
+    "benchmark_sync_call(api_kwargs)\n",
+    "\n",
+    "print(\"\\nStarting asynchronous benchmark...\\n\")\n",
+    "await benchmark_async_acall(api_kwargs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "bcfOfW5wteYr"
+   },
+   "source": [
+    "### Adalflow - model_client() - **Custom Model** client building (ModelType.LLM) and (ModelType.EMBEDDER) - Synchronous\n",
+    "Note: I am using openai api as a example to build custom model client in adalflow. Even though its already there in adalflow repo below code will definitly be a starter code whom ever wants to build a custom model client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "id": "kOeBbL31tmLz"
+   },
+   "outputs": [],
+   "source": [
+    "# Building simple custom third party model client and using it\n",
+    "# I have modified convert_inputs_to_api_kwargs() to make sure it follows the prompt of openai and i have used appropiate\n",
+    "# openai api call in __call__()\n",
+    "\n",
+    "import openai\n",
+    "from adalflow.core.model_client import ModelClient\n",
+    "from adalflow.core.types import ModelType, GeneratorOutput, EmbedderOutput\n",
+    "from openai.types import (\n",
+    "    CreateEmbeddingResponse,\n",
+    ")\n",
+    "from adalflow.components.model_client.utils import parse_embedding_response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "id": "7GhkATzXuBdQ"
+   },
+   "outputs": [],
+   "source": [
+    "class SimpleCustomModelClient(ModelClient):\n",
+    "    # Initialize the custom model client\n",
+    "    def __init__(self):\n",
+    "        # Call the parent class's initializer\n",
+    "        super().__init__()\n",
+    "        pass  # Placeholder for any initialization logic if needed in the future\n",
+    "\n",
+    "    # Method to convert input into API parameters for different model types (LLM or Embedder)\n",
+    "    def convert_inputs_to_api_kwargs(\n",
+    "        self, input=None, model_kwargs={}, model_type=ModelType.UNDEFINED\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        Convert the inputs into API arguments based on the model type.\n",
+    "\n",
+    "        Args:\n",
+    "            input (str): The input text to be processed.\n",
+    "            model_kwargs (dict): Additional model parameters like temperature, max_tokens, etc.\n",
+    "            model_type (ModelType): The type of model to use (LLM or Embedder).\n",
+    "\n",
+    "        Returns:\n",
+    "            dict: API arguments formatted for the specified model type.\n",
+    "        \"\"\"\n",
+    "        if (\n",
+    "            model_type == ModelType.LLM\n",
+    "        ):  # If the model type is a large language model (LLM)\n",
+    "            return {\n",
+    "                \"model\": model_kwargs[\n",
+    "                    \"model\"\n",
+    "                ],  # Set the model to use (e.g., GPT-3, GPT-4)\n",
+    "                \"messages\": input,  # Provide the input as the message\n",
+    "                \"temperature\": model_kwargs[\n",
+    "                    \"temperature\"\n",
+    "                ],  # Set the temperature (creativity of the response)\n",
+    "                \"max_tokens\": model_kwargs[\n",
+    "                    \"max_tokens\"\n",
+    "                ],  # Max tokens to generate in the response\n",
+    "            }\n",
+    "        elif model_type == ModelType.EMBEDDER:  # If the model type is an embedder\n",
+    "            return {\n",
+    "                \"model\": model_kwargs[\"model\"],  # Model name for embedding\n",
+    "                \"input\": [input],  # Provide the input in a list format for embedding\n",
+    "            }\n",
+    "        else:\n",
+    "            # Raise an error if the model type is unsupported\n",
+    "            raise ValueError(f\"model_type {model_type} is not supported\")\n",
+    "\n",
+    "    # Method to make the actual API call to OpenAI for either completions (LLM) or embeddings\n",
+    "    def call(self, api_kwargs={}, model_type=ModelType.UNDEFINED):\n",
+    "        \"\"\"\n",
+    "        Call the appropriate OpenAI API method based on the model type (LLM or Embedder).\n",
+    "\n",
+    "        Args:\n",
+    "            api_kwargs (dict): Arguments to be passed to the API call.\n",
+    "            model_type (ModelType): The type of model (LLM or Embedder).\n",
+    "\n",
+    "        Returns:\n",
+    "            Response: The API response from OpenAI.\n",
+    "        \"\"\"\n",
+    "        if model_type == ModelType.LLM:  # If the model type is LLM (e.g., GPT-3, GPT-4)\n",
+    "            return openai.chat.completions.create(\n",
+    "                **api_kwargs\n",
+    "            )  # Call the chat API for completion\n",
+    "        elif model_type == ModelType.EMBEDDER:  # If the model type is Embedder\n",
+    "            return openai.embeddings.create(**api_kwargs)  # Call the embedding API\n",
+    "        else:\n",
+    "            # Raise an error if an invalid model type is passed\n",
+    "            raise ValueError(f\"Unsupported model type: {model_type}\")\n",
+    "\n",
+    "    # Method to parse the response from a chat completion API call\n",
+    "    def parse_chat_completion(self, completion):\n",
+    "        \"\"\"\n",
+    "        Parse the response from a chat completion API call into a custom output format.\n",
+    "\n",
+    "        Args:\n",
+    "            completion: The completion response from the OpenAI API.\n",
+    "\n",
+    "        Returns:\n",
+    "            GeneratorOutput: A custom data structure containing the parsed response.\n",
+    "        \"\"\"\n",
+    "        # Note: GeneratorOutput is a adalflow dataclass that contains the parsed completion data\n",
+    "        return GeneratorOutput(\n",
+    "            data=completion,  # Store the raw completion data\n",
+    "            error=None,  # No error in this case\n",
+    "            raw_response=str(completion),  # Store the raw response as a string\n",
+    "        )\n",
+    "\n",
+    "    # Method to parse the response from an embedding API call\n",
+    "    def parse_embedding_response(\n",
+    "        self, response: CreateEmbeddingResponse\n",
+    "    ) -> EmbedderOutput:\n",
+    "        \"\"\"\n",
+    "        Parse the response from an embedding API call into a custom output format.\n",
+    "\n",
+    "        Args:\n",
+    "            response (CreateEmbeddingResponse): The response from the embedding API.\n",
+    "\n",
+    "        Returns:\n",
+    "            EmbedderOutput: A custom data structure containing the parsed embedding response.\n",
+    "        \"\"\"\n",
+    "        try:\n",
+    "            # Attempt to parse the embedding response using a helper function\n",
+    "            return parse_embedding_response(response)\n",
+    "        except Exception as e:\n",
+    "            # If parsing fails, return an error message with the raw response\n",
+    "            return EmbedderOutput(data=[], error=str(e), raw_response=response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "id": "W0p7jVaeuE66"
+   },
+   "outputs": [],
+   "source": [
+    "def build_custom_model_client():\n",
+    "    # Instantiate the custom model client (SimpleCustomModelClient)\n",
+    "    custom_client = SimpleCustomModelClient()\n",
+    "\n",
+    "    # Define the query for the model to process\n",
+    "    query = \"What is the capital of France?\"\n",
+    "\n",
+    "    # Set the model type for a Large Language Model (LLM)\n",
+    "    model_type = ModelType.LLM\n",
+    "\n",
+    "    # Prepare the message prompt as expected by the OpenAI chat API.\n",
+    "    # This format is suitable for GPT-like models (e.g., gpt-3.5-turbo).\n",
+    "    message_prompt = [\n",
+    "        {\n",
+    "            \"role\": \"user\",  # Define the user role in the conversation\n",
+    "            \"content\": [\n",
+    "                {\n",
+    "                    \"type\": \"text\",  # Specify that the input is a text type\n",
+    "                    \"text\": query,  # The actual query to be processed by the model\n",
+    "                }\n",
+    "            ],\n",
+    "        }\n",
+    "    ]\n",
+    "\n",
+    "    # Print message indicating the usage of the LLM model type\n",
+    "    print(\"ModelType LLM\")\n",
+    "\n",
+    "    # Define additional model parameters like model name, temperature, and max tokens for LLM\n",
+    "    model_kwargs = {\"model\": \"gpt-3.5-turbo\", \"temperature\": 0.5, \"max_tokens\": 100}\n",
+    "\n",
+    "    # Convert the input message and model kwargs into the required API parameters\n",
+    "    api_kwargs = custom_client.convert_inputs_to_api_kwargs(\n",
+    "        input=message_prompt, model_kwargs=model_kwargs, model_type=model_type\n",
+    "    )\n",
+    "\n",
+    "    # Print the API arguments that will be passed to the call method\n",
+    "    print(f\"api_kwargs: {api_kwargs}\")\n",
+    "\n",
+    "    # Call the LLM model using the prepared API arguments\n",
+    "    result = custom_client.call(api_kwargs, ModelType.LLM)\n",
+    "\n",
+    "    # Print the result of the LLM model call (response from OpenAI)\n",
+    "    print(result)\n",
+    "\n",
+    "    # Parse the chat completion response and output a more structured result\n",
+    "    response_text = custom_client.parse_chat_completion(result)\n",
+    "\n",
+    "    # Print the structured response from the chat completion\n",
+    "    print(f\"response_text: {response_text}\")\n",
+    "\n",
+    "    # Switch to using the Embedder model type\n",
+    "    print(\"ModelType EMBEDDER\")\n",
+    "\n",
+    "    # Define model-specific parameters for the embedding model\n",
+    "    model_kwargs = {\n",
+    "        \"model\": \"text-embedding-3-small\",\n",
+    "        \"dimensions\": 8,\n",
+    "        \"encoding_format\": \"float\",\n",
+    "    }\n",
+    "\n",
+    "    # Convert the input query for the embedder model\n",
+    "    api_kwargs = custom_client.convert_inputs_to_api_kwargs(\n",
+    "        input=query, model_kwargs=model_kwargs, model_type=ModelType.EMBEDDER\n",
+    "    )\n",
+    "\n",
+    "    # Print the API arguments that will be passed to the embedder model\n",
+    "    print(f\"embedder api_kwargs: {api_kwargs}\")\n",
+    "\n",
+    "    # Call the Embedder model using the prepared API arguments\n",
+    "    result = custom_client.call(api_kwargs, ModelType.EMBEDDER)\n",
+    "\n",
+    "    # Print the result of the Embedder model call (embedding response)\n",
+    "    print(result)\n",
+    "\n",
+    "    # Parse the embedding response and output a more structured result\n",
+    "    response_text = custom_client.parse_embedding_response(result)\n",
+    "\n",
+    "    # Print the structured response from the embedding model\n",
+    "    print(f\"response_text: {response_text}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "RkVCvbWruKs4",
+    "outputId": "ffa02fa3-7570-4bf1-9880-0288d358f815"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ModelType LLM\n",
+      "api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'What is the capital of France?'}]}], 'temperature': 0.5, 'max_tokens': 100}\n",
+      "ChatCompletion(id='chatcmpl-ASHw0PEDqdMlIAIZwr8w2t4L3C9u2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305488, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=14, total_tokens=21, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\n",
+      "response_text: GeneratorOutput(id=None, data=ChatCompletion(id='chatcmpl-ASHw0PEDqdMlIAIZwr8w2t4L3C9u2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305488, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=14, total_tokens=21, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0))), error=None, usage=None, raw_response=\"ChatCompletion(id='chatcmpl-ASHw0PEDqdMlIAIZwr8w2t4L3C9u2', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of France is Paris.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1731305488, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=14, total_tokens=21, completion_tokens_details=CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0, accepted_prediction_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))\", metadata=None)\n",
+      "ModelType EMBEDDER\n",
+      "embedder api_kwargs: {'model': 'text-embedding-3-small', 'input': ['What is the capital of France?']}\n",
+      "CreateEmbeddingResponse(data=[Embedding(embedding=[0.04169800877571106, 0.0158005952835083, 0.028160491958260536, 0.024351144209504128, -0.023142803460359573, -0.002739247865974903, -0.014223608188331127, 0.01433624979108572, 0.010834109038114548, -0.010199218057096004, 0.006942841224372387, -0.024043940007686615, -0.06164587661623955, -0.01508378330618143, -0.014233848080039024, 0.023163283243775368, -0.006625395733863115, 0.019446099177002907, 0.07241854071617126, -0.024392105638980865, 0.003002932295203209, -0.010091695934534073, -0.04100167378783226, 0.011970768682658672, 0.06209644302725792, 0.0070964437909424305, -0.04554831609129906, -0.007347328122705221, 0.00364038348197937, 0.03942468762397766, 0.04214857518672943, -0.0251498781144619, -0.0019558740314096212, 0.04309067130088806, -0.024535467848181725, -0.03995717689394951, -0.03764289617538452, -0.039342764765024185, 0.021320052444934845, 0.029676036909222603, -0.003136054612696171, -0.01302550733089447, 0.00684555945917964, 0.013230310752987862, -0.027320796623826027, -0.030679574236273766, -0.009221280924975872, -0.039936695247888565, -0.03360826522111893, 0.02748463861644268, 0.03883075714111328, 0.004044870380312204, 0.03252280876040459, 0.03262520954012871, -0.016814373433589935, 0.004218953661620617, 0.024678830057382584, 0.009641128592193127, 0.04665425419807434, 0.015544591471552849, 0.036127351224422455, -0.010265778750181198, 0.026358218863606453, 0.0043085552752017975, 0.0005580897559411824, 0.0354514978826046, -0.0039322287775576115, 0.03788866102695465, 0.05906534940004349, 0.04612176492810249, -0.011059393174946308, 0.016312604770064354, -0.00918543990701437, 0.004631120711565018, -0.006594675127416849, -0.018145596608519554, -0.003968069329857826, -0.0059649040922522545, -0.03207223862409592, -0.031867437064647675, -0.036168310791254044, 0.0010604985291138291, -0.01807391457259655, -0.008606869727373123, 0.008248464204370975, -0.044647179543972015, 0.026767827570438385, 0.03383354842662811, -0.022917520254850388, -0.04767827317118645, 0.0033997392747551203, 0.011141314171254635, -0.025928132236003876, 0.027115993201732635, -0.010388661175966263, 0.01921057514846325, 0.03549245744943619, 0.0011750605190172791, -0.06819958984851837, 0.000605450535658747, 0.019323216751217842, -0.023982498794794083, -0.031109662726521492, 0.026972630992531776, 0.02560044638812542, 0.040182460099458694, 0.015862036496400833, -0.004974166862666607, 0.003153975121676922, -0.03852355107665062, -0.025661887601017952, 0.011212995275855064, 0.0033536585979163647, 0.02431018464267254, -0.04812883958220482, -0.029102588072419167, -0.023859616369009018, -0.02416682057082653, 0.02902066521346569, -0.02574380859732628, 0.033157698810100555, 0.052511636167764664, -0.04718674346804619, 0.010337459854781628, 0.010752187110483646, -0.013424874283373356, -0.0027725284453481436, -0.002777648391202092, 0.03491900861263275, -0.03870787471532822, 0.01074194721877575, -0.02752560004591942, 0.024535467848181725, 0.033055298030376434, 0.031232545152306557, 0.01897505111992359, 0.026952149346470833, -0.016937255859375, -0.018544962629675865, 0.010782907716929913, 0.007931018248200417, 0.013189350254833698, 0.021668218076229095, 0.003315257839858532, -0.02668590471148491, -0.01458201464265585, -0.04143176227807999, 0.040530625730752945, 0.01154068112373352, -0.042312417179346085, 0.040428224951028824, -0.02312232367694378, -0.0038989479653537273, 0.01604636013507843, -0.0056525785475969315, -0.036721281707286835, -0.008970396593213081, 0.019824985414743423, 0.0059649040922522545, 0.04341835901141167, -0.03878979757428169, 0.04927574098110199, -0.03719232976436615, -0.006026345305144787, 0.012257494032382965, 0.03287097439169884, -0.03643455356359482, -0.02140197344124317, 0.00695820152759552, -0.005381213966757059, -0.02461738884449005, 0.004137032199651003, 0.054354868829250336, 0.021156208589673042, 0.03006516396999359, -0.024392105638980865, -0.04943958297371864, 0.0406944714486599, 0.0003852867230307311, -0.01936417818069458, -0.028344813734292984, -0.02803760953247547, 0.011735244654119015, 0.013045987114310265, 0.061277229338884354, 0.029532674700021744, -0.011284676380455494, -0.025477563962340355, -0.014428411610424519, 0.012564699165523052, 0.03582014515995979, -0.02020387165248394, 0.06160491332411766, -0.008207502774894238, -0.043950848281383514, 0.0198147464543581, 0.03352634608745575, 0.01265686098486185, 0.012267733924090862, -0.007997579872608185, -0.020490597933530807, 0.02193446271121502, -0.00551945623010397, 0.014377210289239883, -0.02158629707992077, 0.030536212027072906, -0.011591882444918156, -0.013496555387973785, -0.01398808415979147, -0.010286259464919567, 0.0009939373703673482, -0.003008052473887801, -0.02521131932735443, 0.00474120257422328, 0.0012096210848540068, 0.025026995688676834, -9.424164454685524e-05, 0.01112083438783884, 0.004208713304251432, 0.024494506418704987, 0.022815117612481117, 0.015216905623674393, 0.003947588615119457, -0.01148948073387146, -0.05591137334704399, 0.047473467886447906, 0.06185067817568779, 0.011110593564808369, 0.007116924040019512, -0.0036890243645757437, 0.021012846380472183, -0.03192887827754021, 0.0009395363740622997, -0.011223236098885536, -0.03283001109957695, 0.017705269157886505, 0.014141686260700226, 0.02832433395087719, -0.03524669632315636, 0.022815117612481117, -0.010803388431668282, 0.021135728806257248, 0.02863154001533985, -0.006625395733863115, -0.012298454530537128, -0.005204570945352316, 0.027464158833026886, 0.036270711570978165, 0.005877862684428692, 0.04337739571928978, 0.057426922023296356, -0.0076238131150603294, -0.0018624324584379792, -0.005703779403120279, -0.019743064418435097, 0.059556879103183746, -0.024494506418704987, -0.02818097174167633, -0.0359635055065155, -0.018145596608519554, 0.006650995928794146, 0.004362315870821476, -0.002106916857883334, -0.014326009899377823, 0.020869484171271324, 0.00018768326845020056, -0.01986594684422016, -0.024678830057382584, -0.014684416353702545, -0.008709271438419819, 0.009738409891724586, -0.003530301619321108, -0.0166812501847744, 0.009892012923955917, -0.019005771726369858, 0.015872277319431305, -0.01856544427573681, -0.00817166268825531, -0.021258611232042313, 0.0370284840464592, -0.0268907081335783, 0.04481102153658867, -0.012892385013401508, 0.0419028103351593, -0.051774341613054276, 0.0009952173568308353, 0.04423757269978523, 0.021258611232042313, -0.012605659663677216, 0.03065909445285797, 0.021033326163887978, 0.01985570602118969, -0.019435858353972435, -0.002831409452483058, -0.0029978123493492603, -0.04427853226661682, -0.003950148820877075, 0.0011648202780634165, 0.026870228350162506, -0.001858592382632196, -0.022753676399588585, 0.022466951981186867, -0.005186650436371565, 0.010035375133156776, -0.04517966881394386, 0.06574194878339767, -0.0051431297324597836, 0.047063861042261124, 0.05214298889040947, 0.00638987123966217, -0.039240363985300064, -0.03143734857439995, 0.024637870490550995, -0.03422267735004425, -0.010224818252027035, 0.045589275658130646, -0.013240550644695759, -0.0004217673558741808, 0.029635077342391014, -0.00687115965411067, 0.025129398331046104, 0.00804365985095501, 0.02451498806476593, -0.008376466110348701, -0.0023782814387232065, 0.01683485321700573, 0.012370135635137558, 0.02650158293545246, -0.03506237268447876, -0.02381865493953228, -0.0005033687921240926, 0.011407558806240559, 0.004651600960642099, -0.00990737322717905, -0.026112455874681473, -0.02099236659705639, 0.004933205898851156, 0.03901508077979088, 0.0013401834294199944, -0.014151927083730698, -0.0333625003695488, 0.04640848934650421, 0.009205920621752739, 0.03094581887125969, 0.003264056984335184, -0.026071494445204735, 0.018852168694138527, 0.02465835027396679, 0.012237013317644596, 0.0034663004335016012, -0.027402717620134354, -0.007209085859358311, -0.009190560318529606, -0.008176782168447971, -0.027771364897489548, 0.002693166956305504, 0.0702066645026207, -0.022405510768294334, -0.06353006511926651, 0.03995717689394951, -0.04046918451786041, -0.0492347776889801, -0.025784770026803017, -0.04837460443377495, -0.03381307050585747, -0.0039271083660423756, 0.013353193178772926, 0.004339275881648064, -0.020275553688406944, 0.06266989558935165, -0.03268665075302124, 0.0050637684762477875, -0.004106311593204737, -0.02090020477771759, 0.0425991415977478, -0.030085645616054535, 0.04235338047146797, 0.02119717001914978, 0.013793520629405975, -0.01633308455348015, -0.028590578585863113, -0.01782815158367157, 0.015472909435629845, -0.026112455874681473, 0.06140011176466942, 0.014418171718716621, 0.0824129581451416, -0.04210761561989784, -0.009810090996325016, 0.03045429103076458, -0.005196890793740749, 0.010414261370897293, 0.03174455463886261, 0.03784770146012306, -0.07372928410768509, 0.00563209829851985, 0.01000465452671051, 0.018135355785489082, -0.007413889281451702, -0.038892198354005814, 0.021750139072537422, -0.0187702476978302, -0.03147830814123154, -0.049644384533166885, 0.07581828534603119, 0.02055203914642334, 0.026010053232312202, 0.0049127256497740746, 0.014817538671195507, -0.03244088590145111, -0.004838484339416027, -0.06045801565051079, 0.008407186716794968, -0.011806925758719444, 0.002859569853171706, 0.05161049962043762, 0.06066281720995903, -0.06926456838846207, 0.026276297867298126, -0.015677712857723236, -0.003386939177289605, -0.0044570378959178925, -0.046531371772289276, 0.00856590922921896, -0.022303108125925064, -0.008227983489632607, -0.015493390150368214, -0.04690001904964447, 0.003983429633080959, 0.02867249958217144, -0.0010368181392550468, -0.0045363991521298885, 0.0017062698025256395, 0.0016051479615271091, 0.0011558601399883628, -0.007229566108435392, 0.006482033059000969, 0.04550735279917717, -0.03199031949043274, 0.023347606882452965, 0.016957735642790794, 0.0008672151016071439, 0.002657326404005289, -0.013865201734006405, -0.03676224127411842, -0.018729286268353462, 0.03743809461593628, 0.013066467829048634, -0.04616272449493408, 0.046777136623859406, -0.022446472197771072, 0.007966859266161919, -0.02134053222835064, -0.01714205928146839, -0.007772295735776424, 0.03743809461593628, 0.026071494445204735, 0.0901135727763176, 0.008571029640734196, 0.0002102436701534316, 0.003084853757172823, 0.059597838670015335, 0.013240550644695759, 0.027853285893797874, 0.034447960555553436, 0.023654812946915627, 0.026583503931760788, 0.015821075066924095, -0.04046918451786041, -0.04603984206914902, -0.005135449580848217, 0.04509774595499039, 0.010158257558941841, 0.014305529184639454, -0.027791844680905342, -0.020971884950995445, -0.058164212852716446, 0.014991621486842632, -0.05423198640346527, -0.024781232699751854, -0.03844163194298744, 0.008611990138888359, -0.0031642152462154627, -0.02584621123969555, -0.08204431086778641, 0.006246509030461311, 0.005030487664043903, 0.03838019073009491, -0.032113201916217804, 0.02203686349093914, 0.04186185076832771, -0.013783280737698078, -0.0034995810128748417, -0.005806181114166975, 0.02818097174167633, -0.008089740760624409, -0.04341835901141167, -0.01732638292014599, -0.017705269157886505, -0.05644386261701584, -0.015964439138770103, 0.015012102201581001, 0.006722677033394575, 0.009948333725333214, 0.04218953475356102, 0.05820517614483833, 0.04694097861647606, 0.026030534878373146, -0.023654812946915627, -0.010516663081943989, 0.014520573429763317, -0.04829268157482147, 0.012626140378415585, 0.020080991089344025, -0.011755725368857384, 0.008253583684563637, -0.02381865493953228, 0.012011729180812836, -0.0015705873956903815, -0.026808787137269974, 0.025047477334737778, 0.01603611931204796, -0.002360361162573099, 0.006313070189207792, 0.027607521042227745, -0.008007819764316082, -0.009784490801393986, 0.001804831437766552, 0.03153974935412407, -0.056525785475969315, 0.005724259652197361, -0.021504374220967293, -0.011581641621887684, -0.0017830710858106613, -0.009118879213929176, 0.0008339345222339034, -0.009513125754892826, 0.04927574098110199, 0.016599329188466072, 0.04358220100402832, -0.0006348910974338651, -0.003113014390692115, 0.005381213966757059, -0.014244087971746922, -0.03608638793230057, -0.01856544427573681, 0.006313070189207792, -0.05136473476886749, -0.01970210298895836, -0.03362874686717987, -0.022446472197771072, 0.0576317235827446, -0.04431949183344841, -0.0436641201376915, -0.0021849980112165213, 0.008924315683543682, -0.028201451525092125, -0.0027981288731098175, 0.04235338047146797, -0.003909188322722912, 0.029245950281620026, 0.028733940795063972, 0.037417612969875336, 0.005606497637927532, 0.004183113109320402, 0.004213833715766668, 0.006394991651177406, 0.003141174791380763, -0.010255538858473301, -0.03455036133527756, 0.019599702209234238, 0.04354123771190643, 0.017049897462129593, -0.013568236492574215, 0.01190932746976614, 0.010700986720621586, -0.03180599585175514, 0.0026624463498592377, -0.006400111597031355, 0.05996648594737053, 0.013629677705466747, 0.0020390755962580442, 0.011929808184504509, -0.05472351610660553, -0.0569353923201561, 0.0225283931940794, -0.004899925552308559, 0.015923477709293365, -0.018299199640750885, -0.056771550327539444, -0.01986594684422016, 0.06279277801513672, 0.007444609887897968, -0.005831781774759293, 0.0036839041858911514, 0.005360733717679977, 0.00666123628616333, -0.023060882464051247, -0.025969093665480614, -0.009876652620732784, 0.0010195377981290221, 0.008673431351780891, -0.0253137219697237, -0.04218953475356102, 0.02476075105369091, -0.014797057956457138, 0.025539005175232887, -0.027709923684597015, 0.013936882838606834, 0.005785700865089893, 0.010368180461227894, -0.0006460912409238517, 0.005714019760489464, 0.01344535406678915, -0.006302829831838608, 0.012185812927782536, -0.0016345884650945663, 0.0309048593044281, -0.004649041220545769, 0.022671755403280258, -0.006533233914524317, 0.028652019798755646, -0.025887170806527138, 0.0092571210116148, -0.01698845624923706, 0.018084155395627022, 0.011161794885993004, 0.010388661175966263, 0.022446472197771072, -0.02639918029308319, -0.01643548719584942, 0.021852541714906693, -0.015565071254968643, 0.010311859659850597, -0.02207782492041588, -0.03278905153274536, 0.022016383707523346, -0.0009350563050247729, 0.00790029764175415, -0.0403258241713047, 0.045630235224962234, 0.005114969331771135, -0.008489107713103294, 0.01793055236339569, -0.021852541714906693, -0.001845792168751359, 0.024924594908952713, 0.04143176227807999, 0.006640756037086248, -0.014592254534363747, 0.030781976878643036, 0.029204988852143288, 0.004679761826992035, 0.007265406660735607, 0.004825684241950512, 0.022548872977495193, 0.03119158372282982, -0.0024730032309889793, -0.026010053232312202, 0.06570098549127579, -0.02451498806476593, -0.027361758053302765, 0.03360826522111893, -0.015339787118136883, -0.034591324627399445, 0.005975143983960152, -0.03524669632315636, 0.00684555945917964, 0.05255259573459625, 0.032809533178806305, -0.03348538279533386, -0.00364038348197937, 0.02392105758190155, 0.03129398450255394, -0.03598398715257645, -0.008560789749026299, -0.007567491848021746, -0.0017190700164064765, -0.025661887601017952, 0.009579687379300594, 0.04530255123972893, 0.028488175943493843, -0.01124371588230133, 0.03907652199268341, -0.0022566793486475945, 0.009164960123598576, 0.019876185804605484, 0.05017687380313873, 0.027894247323274612, -0.012370135635137558, -0.01876000687479973, -0.0022029185201972723, -0.02238503098487854, 0.04067398980259895, 0.025375163182616234, -0.03723328933119774, 0.03817538544535637, 0.013291751965880394, 0.013936882838606834, 0.01975330524146557, -0.005790820810943842, 0.00148226588498801, -0.03278905153274536, 0.004065351095050573, 0.00993297342211008, 0.004738642834126949, -0.010956991463899612, -0.005908582825213671, 0.012441816739737988, -0.009477285668253899, -0.041022155433893204, -0.051037050783634186, -0.025641407817602158, 0.008048780262470245, -0.0029542914126068354, 0.04415564984083176, 0.0024294822942465544, -0.011079872958362103, 0.023859616369009018, -0.024453546851873398, -0.006246509030461311, -0.010793148539960384, -0.0309048593044281, -0.025477563962340355, 0.029041146859526634, 0.011212995275855064, -0.003747905371710658, 0.009415844455361366, 0.01599515974521637, 0.009001117199659348, -0.009922732599079609, -0.028099050745368004, -0.00941072404384613, 0.006154347211122513, 0.018708806484937668, 0.014735616743564606, -0.0012313814368098974, -0.00442119687795639, -0.04247625917196274, 0.0018880328861996531, -0.02818097174167633, 0.024822192266583443, 0.006092906463891268, -0.03252280876040459, 0.03502140939235687, 0.048948053270578384, -0.002961971564218402, -0.015964439138770103, -0.021156208589673042, -0.02088996395468712, -0.031416866928339005, 0.0026470862794667482, 0.01856544427573681, -0.023941537365317345, 0.021361012011766434, -0.01059858500957489, -0.006502513308078051, -0.005007447209209204, -0.0041703125461936, -0.05931111425161362, 0.00023616412363480777, 0.010875069536268711, -0.03366970643401146, -0.011643082834780216, -0.006681716535240412, -0.019282257184386253, 0.01185812707990408, -0.004495438188314438, -0.024986036121845245, -0.014653695747256279, -0.019937627017498016, 0.016271643340587616, -0.033997394144535065, -0.000680331839248538, 0.01089555025100708, -0.0074753304943442345, -0.06484080851078033, 0.01487897988408804, -0.0370284840464592, -0.029450753703713417, 0.019988829270005226, 0.005186650436371565, 0.011202755384147167, 0.0272388756275177, -0.008110221475362778, -0.019528020173311234, 0.07876745611429214, -0.05455967038869858, -0.007516290992498398, -0.02412586100399494, 0.009661608375608921, -0.0025792450178414583, -0.004874324891716242, -0.005831781774759293, 0.02064420096576214, -0.012257494032382965, 0.04317259415984154, -0.03237944468855858, 0.00173955038189888, -0.04780115187168121, -0.030679574236273766, 0.010337459854781628, 0.023450009524822235, -0.020582759752869606, 0.012298454530537128, 0.02318376488983631, -0.03432507812976837, 0.007838856428861618, 0.02074660174548626, 0.019149133935570717, -0.018145596608519554, -0.03950661048293114, -0.0006950521492399275, -0.029553154483437538, 0.02867249958217144, 0.016363805159926414, -0.006610035430639982, -0.029307391494512558, 0.038503073155879974, 0.01103891246020794, 0.022323589771986008, 0.007250046357512474, -0.029635077342391014, -0.032850492745637894, 0.0340588353574276, -0.02065443992614746, 0.04460621625185013, 0.04976726695895195, 0.010316980071365833, 0.01638428494334221, -0.007019642274826765, 0.02570284903049469, -0.01037330087274313, 0.02070564031600952, 0.0149301802739501, 0.006594675127416849, 0.012103891000151634, -0.03301433473825455, -0.005178970284759998, 0.00423431396484375, -0.007628933060914278, 0.025887170806527138, 0.03133494779467583, -0.04309067130088806, 0.039793334901332855, 0.006748277693986893, -0.0036275831516832113, 0.023593371734023094, 0.005616737995296717, 0.0040807113982737064, -0.01608731970191002, 0.048456523567438126, -0.04415564984083176, -0.012912864796817303, -0.012257494032382965, 0.02832433395087719, 0.002544684335589409, 0.02748463861644268, 0.02105380780994892, -0.02099236659705639, -0.009344163350760937, -0.00446727778762579, -0.03514429181814194, -0.006231148727238178, -0.0031488549429923296, -0.0235728919506073, -0.02709551341831684, -0.04091975465416908, -0.03618879243731499, 0.037909142673015594, 0.012738781981170177, -0.031273506581783295, 0.003896387992426753, 0.01768478751182556, 0.006236268673092127, 0.03174455463886261, -0.006118506658822298, 0.009272481314837933, -0.017305901274085045, -0.054150063544511795, -0.007219325751066208, 0.0130971884354949, 0.013455594889819622, -0.003136054612696171, -0.012482777237892151, -0.012267733924090862, -0.0032358963508158922, -0.02555948495864868, -0.0026496462523937225, 0.01738782413303852, 0.02891826443374157, 0.026603983715176582, 0.003233336377888918, -0.018882889300584793, -0.04792403429746628, 0.011049152351915836, -0.01391640305519104, 0.04354123771190643, -0.009415844455361366, -0.03663935884833336, -0.02242599055171013, 0.0333625003695488, 0.025928132236003876, -0.013936882838606834, 0.01758238673210144, 0.00643595214933157, -0.03418171778321266, 0.03358778730034828, -0.011714763939380646, -0.03254328668117523, 0.011694284155964851, -0.0198147464543581, -0.01185812707990408, 0.018104635179042816, 0.06660211831331253, -0.03397691249847412, -0.01044498197734356, 0.052020106464624405, -0.03205176070332527, 0.027177434414625168, -0.07282815128564835, 0.049152858555316925, -0.04825172200798988, -0.046531371772289276, 0.025825729593634605, 0.014674175530672073, -0.005345373414456844, 0.04010054096579552, -0.006789238192141056, 0.002626605797559023, -0.009451684542000294, 0.035328615456819534, -0.005401694215834141, -0.017008936032652855, -0.019732823595404625, -0.0006131306872703135, -0.030372370034456253, -0.009308322332799435, -0.029082106426358223, 0.017991993576288223, 0.0047488827258348465, 0.014428411610424519, 0.00817166268825531, 0.002288679825142026, 0.03964997082948685, 0.003059253329411149, 0.035287655889987946, -0.010409141890704632, 0.07528579235076904, 0.013537515886127949, 0.010496183298528194, -0.037110406905412674, -0.018903369084000587, -0.016568608582019806, 0.018360640853643417, 0.0232247244566679, -0.004882005043327808, 0.013936882838606834, -0.016005398705601692, -0.010158257558941841, -0.02207782492041588, 0.016896294429898262, 0.0011219395091757178, 0.0011974609224125743, -0.022507913410663605, 0.020377954468131065, -0.01649692840874195, 0.026563024148344994, 0.022303108125925064, 0.04546639323234558, -0.015196424908936024, -0.022548872977495193, -0.009886892512440681, 0.01367063820362091, -0.013271271251142025, -0.03977285325527191, 0.03950661048293114, 0.012554459273815155, 0.008555669337511063, -4.080071084899828e-05, 0.009461925365030766, 0.0007302527083083987, -0.000619530794210732, -0.0027827685698866844, -0.010588345117866993, 0.0004899285268038511, -0.012933345511555672, -0.002330920659005642, -0.00698380172252655, -0.013015267439186573, 0.03303481638431549, 0.032113201916217804, 0.01797151379287243, -0.020869484171271324, 0.010608824901282787, -0.000775053515098989, 0.0035635821986943483, -0.0031846954952925444, 0.04481102153658867, -0.04163656756281853, -0.06049897521734238, 0.03668031841516495, 0.010772667825222015, -0.015769874677062035, -0.009820330888032913, -0.017510704696178436, -0.012216532602906227, 0.023859616369009018, -0.03967045247554779, -0.07143548876047134, 0.008632470853626728, -0.01826847903430462, 0.0010144177358597517, -0.01220629271119833, -0.02034723572432995, 0.023347606882452965, 0.027771364897489548, -0.004183113109320402, -0.024945074692368507, -0.015053062699735165, -0.027300316840410233, 0.007654533721506596, 0.008739992044866085, -0.0034560603089630604, 0.021156208589673042, -0.017244460061192513, 0.0029491714667528868, -0.011960528790950775, -0.006446192506700754, -0.0019392338581383228, -0.01738782413303852, -0.01094675064086914, -0.038646433502435684, 0.02615341544151306, -0.012329175136983395, 0.012810463085770607, 0.012257494032382965, 0.00544777512550354, -0.02015267126262188, 0.007884937338531017, -0.0041600726544857025, -0.00046176803880371153, -0.03491900861263275, -0.014715136960148811, 0.021258611232042313, -0.010557624511420727, -0.0007635332876816392, 0.02744367904961109, 0.03407931327819824, -0.005621857941150665, -0.05021783709526062, -0.016773412004113197, 0.010875069536268711, -0.03045429103076458, -0.04091975465416908, -0.0333625003695488, -0.004265034571290016, -0.010660026222467422, 0.001943073933944106, 0.014100725762546062, 0.041124556213617325, 0.007567491848021746, 0.028099050745368004, -0.032358963042497635, -0.003194935619831085, 0.031621672213077545, 0.02441258542239666, 0.011581641621887684, -0.0070964437909424305, 0.02189350128173828, -0.0016473886789754033, 0.01354775670915842, 0.014305529184639454, 0.012114130891859531, -0.019026251509785652, -0.008038540370762348, 0.006072425749152899, 0.034447960555553436, 0.01227797381579876, 0.04501582682132721, 0.008750232867896557, -0.014694656245410442, 0.030495252460241318, 0.035431016236543655, -0.03715136647224426, -0.04710482060909271, 0.020971884950995445, -0.020336994901299477, -0.01614876091480255, -0.008770712651312351, 0.015022342093288898, 0.03942468762397766, 0.007895177230238914, -0.015964439138770103, 0.059106308966875076, 0.023142803460359573, -0.012841183692216873, -0.013977843336760998, 0.011141314171254635, 0.0375404953956604, 0.03000372275710106, -0.027853285893797874, 0.02748463861644268, -0.018114876002073288, 0.013568236492574215, -0.0023949218448251486, -0.03856451436877251, -0.00035296616260893643, -0.0068660397082567215, -0.02006050944328308, 0.017838390544056892, 0.03491900861263275, 0.011223236098885536, 0.016466207802295685, -0.023388568311929703, 0.021033326163887978, 0.012830943800508976, -0.010547383688390255, -0.01148948073387146, 0.01044498197734356, 0.04169800877571106, -0.031621672213077545, -0.042517222464084625, -0.00288517028093338, -0.0021389173343777657, -0.020029788836836815, -0.017060138285160065, -0.011212995275855064, -0.010357940569519997, -0.005102168768644333, 0.017459504306316376, 0.016415005549788475, -0.017695028334856033, -0.0022464392241090536, -0.022999441251158714, 0.008028300479054451, -0.017295662313699722, -0.0039117480628192425, -0.02609197422862053, 0.0340588353574276, 0.013906162232160568, -0.018483523279428482, 0.0232247244566679, 0.03016756661236286, -0.008606869727373123, -0.027894247323274612, 0.016322845593094826, 0.04788307473063469, -0.004528719000518322, 0.02633773908019066, 0.008227983489632607, -0.00977937038987875, -0.0025434044655412436, -0.011212995275855064, 0.009820330888032913, -0.009451684542000294, -0.006743157748132944, -0.00517385033890605, 0.02787376567721367, 0.04522062838077545, -0.0051277694292366505, 0.004782163538038731, -0.0031181343365460634, 0.01265686098486185, -0.06258796900510788, -0.022016383707523346, 0.003717184765264392, -0.015923477709293365, -0.007086203433573246, -0.017408303916454315, 0.005319772753864527, 0.011776205152273178, 0.019333457574248314, 0.004185672849416733, 0.01882144808769226, 0.011458760127425194, -0.04431949183344841, -0.024146340787410736, 0.022344069555401802, 0.027156952768564224, -0.004167752806097269, 0.0035482218954712152, 0.04599888250231743, -0.0005158489802852273, -0.027054551988840103, 0.003891267813742161, 0.026665424928069115, -0.028652019798755646, 0.0003868867352139205, 0.013578476384282112, -0.022118786349892616, 0.004920405801385641, -0.0002334440650884062, -0.0031846954952925444, 0.00881167408078909, 0.02312232367694378, -0.007219325751066208, 0.03420219570398331, 0.014674175530672073, -0.020336994901299477, 0.0037402252200990915, -0.01664029061794281, 0.000318565551424399, 0.024965554475784302, 0.0074702100828289986, 0.04530255123972893, 0.012585179880261421, 0.029778439551591873, -0.0001963234244612977, -0.011806925758719444, 0.008105101063847542, -0.008079500868916512, -0.0253137219697237, -0.005493856035172939, 0.03782721981406212, -0.0216477382928133, 0.03690560534596443, 0.03371066972613335, 0.012114130891859531, -0.02912306785583496, 0.009518246166408062, 0.0035405417438596487, 0.008048780262470245, -0.0038503070827573538, -0.01882144808769226, -0.040735431015491486, 0.06094954162836075, -0.01206293050199747, 0.03727424889802933, 0.0033613385166972876, -0.015329547226428986, 0.01369111891835928, -0.05501024052500725, -0.007936138659715652, 0.011653323657810688, 0.02713647298514843, 0.021709179505705833, -0.006420591846108437, -0.0016358685679733753, -0.014029044657945633, -0.01985570602118969, 0.0009254561155103147, 0.061277229338884354, -0.03795010223984718, 0.008658071048557758, -0.028058089315891266, -0.009748649783432484, -0.018401600420475006, -0.01590299792587757, -0.0272388756275177, 0.019005771726369858, 0.0067994785495102406, -0.021606776863336563, 0.0025958851911127567, -0.04804691672325134, -0.029327871277928352, 0.0005596897681243718, -0.024576429277658463, -0.01569819450378418, 0.050832245498895645, 0.005975143983960152, 0.03512381389737129, -0.03381307050585747, -0.02521131932735443, -0.0006137706805020571, -0.0037351050414144993, -0.011571401730179787, 0.002841649577021599, 0.004720722325146198, 0.0034150995779782534, 0.005621857941150665, 0.013496555387973785, -0.027894247323274612, 0.03799106180667877, -0.02684974856674671, 0.00792077835649252, 0.006830199155956507, 0.02684974856674671, 0.031969837844371796, 0.010567864403128624, -0.0034586202818900347, -0.03663935884833336, 0.01753118634223938, -0.00480520399287343, 0.05754980444908142, 0.021115249022841454, 0.039445169270038605, 0.010168497450649738, 0.03215416148304939, 0.00673291739076376, -0.029204988852143288, 0.007337087765336037, -0.03334202244877815, 0.032461367547512054, -0.010977471247315407, -0.03749953582882881, 0.026665424928069115, -0.03180599585175514, -0.006000744644552469, -0.004820564296096563, 0.03846210986375809, 0.008745112456381321, -0.025518525391817093, 0.010071215219795704, 0.03174455463886261, 0.02783280611038208, -0.016322845593094826, -0.014694656245410442, 0.03045429103076458, -0.012769502587616444, -0.04571215808391571, -0.0004825684300158173, 0.0038938280194997787, 0.03555389866232872, -0.006144107319414616, 2.1620377083308995e-05, -0.018954571336507797, -0.004265034571290016, -0.0062413886189460754, -0.020285794511437416, 0.013660398311913013, 0.03739713132381439, 0.014489852823317051, 0.027771364897489548, 0.023757213726639748, -0.009052317589521408, -0.02301992103457451, 0.00821774359792471, -0.017049897462129593, -0.012687581591308117, -0.008760472759604454, -0.016722211614251137, 0.015780115500092506, 0.0028800503350794315, -0.002380841411650181, -0.014418171718716621, 0.012646620161831379, 0.02381865493953228, -0.021422453224658966, -0.01039890106767416, 0.023736733943223953, -0.029143547639250755, 0.016875814646482468, -0.04931670054793358, -0.008929436095058918, 0.02179110050201416, -0.031724072992801666, 0.0432954765856266, 0.03571774438023567, 0.029696518555283546, -0.026726866140961647, 0.0006176107563078403, -0.04841556400060654, -0.002734127687290311, -0.030085645616054535, 0.029839880764484406, 0.010567864403128624, 0.02238503098487854, -0.03784770146012306, -0.008545429445803165, 0.015216905623674393, -0.004930646158754826, -0.021033326163887978, -0.01718301884829998, 0.022118786349892616, -0.012810463085770607, -0.03035189025104046, -0.01831967942416668, 0.006615155376493931, -0.005089368671178818, 0.009282722137868404, 0.011847886256873608, -0.03100726008415222, -0.023040402680635452, -0.033546824008226395, 0.020019549876451492, -0.023163283243775368, -0.009794730693101883, -0.0029696517158299685, -0.0130971884354949, -0.009958573617041111, 0.03237944468855858, 0.012370135635137558, -0.0056525785475969315, 0.02263079397380352, 0.0336901880800724, 0.006579314824193716, 0.017889592796564102, 0.018196796998381615, -0.03383354842662811, -0.013680879026651382, -0.012533978559076786, -0.022364549338817596, 0.009292962029576302, -0.008386706002056599, -0.008048780262470245, 0.0450567863881588, 0.028099050745368004, -0.016056600958108902, -0.047022901475429535, 0.02412586100399494, -0.002428202424198389, -0.0020301153417676687, 0.01643548719584942, 0.01927201636135578, 0.013773039914667606, -0.006922360509634018, -0.014182647690176964, 0.00935952365398407, -0.009989294223487377, 0.008094861172139645, -0.01737758331000805, -0.015216905623674393, 0.0033382982946932316, -0.02713647298514843, -0.030085645616054535, 0.025231800973415375, 0.013814001344144344, -0.023060882464051247, 0.04304971173405647, 0.00407303124666214, 0.0020173152443021536, 0.04980823025107384, 0.03782721981406212, 0.005596257746219635, 0.0309048593044281, -0.041677527129650116, -0.011438279412686825, -0.002384681487455964, -0.022507913410663605, -0.004208713304251432, -0.006528113968670368, 0.03506237268447876, 0.022098304703831673, 0.015206664800643921, 0.0019507539691403508, -0.010219697840511799, 0.041083596646785736, -0.00551945623010397, -0.04034630209207535, -0.01821727678179741, -0.0017805109964683652, -0.01857568323612213, 0.016312604770064354, -0.012400856241583824, 0.012994786724448204, -0.0062772296369075775, -0.0017817910993471742, 0.0007673733634874225, 0.023101842030882835, 0.019128654152154922, 0.015288586728274822, 0.03727424889802933, -0.009236641228199005, 0.023511450737714767, -0.019374417141079903, 0.022958479821681976, 0.00817166268825531, -0.007480450440198183, -0.012237013317644596, 0.015411469154059887, 0.015718674287199974, -0.006282349582761526, 0.03715136647224426, -0.03164215013384819, 0.0043879165314137936, -0.023634331300854683, 0.026603983715176582, 0.010639545507729053, 0.028713461011648178, -0.025375163182616234, -0.016824614256620407, 0.0074702100828289986, -0.00450823875144124, 0.012021970003843307, -0.007040122989565134, -0.015124743804335594, 0.02228262834250927, 0.02490411512553692, 0.012830943800508976, -0.024474026635289192, -0.01793055236339569, -0.02168869785964489, -0.06398063898086548, 0.028549617156386375, 0.021381493657827377, 0.006200428120791912, 0.014592254534363747, -0.026235338300466537, 0.003983429633080959, 0.0033510983921587467, 0.017070377245545387, -0.01005585491657257, 0.018483523279428482, -0.014377210289239883, -0.002320680534467101, 0.00647691311314702, -0.01403928454965353, 0.036864642053842545, -0.033403463661670685, -0.004759123083204031, 0.011950287967920303, -0.04882517084479332, 0.015831315889954567, 0.010153137147426605, -0.00735244806855917, 0.010178737342357635, 0.005591137334704399, -0.02079780213534832, 0.018657606095075607, 0.038748834282159805, -0.008320145308971405, -0.01976354420185089, -0.036967046558856964, 0.003315257839858532, -0.004794963635504246, 0.0333625003695488, -0.0070964437909424305, -0.044196609407663345, 0.027320796623826027, -0.029737478122115135, 0.025477563962340355, 0.04841556400060654, 0.009948333725333214, -0.0030208525713533163, -0.014766337350010872, -0.01426456868648529, -0.02521131932735443, 0.01683485321700573, -0.007183485198765993, -0.03020852617919445, 0.007035002578049898, 0.04362316057085991, -0.02119717001914978, 0.007562371902167797, 0.036270711570978165], index=0, object='embedding')], model='text-embedding-3-small', object='list', usage=Usage(prompt_tokens=7, total_tokens=7))\n",
+      "response_text: EmbedderOutput(data=[Embedding(embedding=[0.04169800877571106, 0.0158005952835083, 0.028160491958260536, 0.024351144209504128, -0.023142803460359573, -0.002739247865974903, -0.014223608188331127, 0.01433624979108572, 0.010834109038114548, -0.010199218057096004, 0.006942841224372387, -0.024043940007686615, -0.06164587661623955, -0.01508378330618143, -0.014233848080039024, 0.023163283243775368, -0.006625395733863115, 0.019446099177002907, 0.07241854071617126, -0.024392105638980865, 0.003002932295203209, -0.010091695934534073, -0.04100167378783226, 0.011970768682658672, 0.06209644302725792, 0.0070964437909424305, -0.04554831609129906, -0.007347328122705221, 0.00364038348197937, 0.03942468762397766, 0.04214857518672943, -0.0251498781144619, -0.0019558740314096212, 0.04309067130088806, -0.024535467848181725, -0.03995717689394951, -0.03764289617538452, -0.039342764765024185, 0.021320052444934845, 0.029676036909222603, -0.003136054612696171, -0.01302550733089447, 0.00684555945917964, 0.013230310752987862, -0.027320796623826027, -0.030679574236273766, -0.009221280924975872, -0.039936695247888565, -0.03360826522111893, 0.02748463861644268, 0.03883075714111328, 0.004044870380312204, 0.03252280876040459, 0.03262520954012871, -0.016814373433589935, 0.004218953661620617, 0.024678830057382584, 0.009641128592193127, 0.04665425419807434, 0.015544591471552849, 0.036127351224422455, -0.010265778750181198, 0.026358218863606453, 0.0043085552752017975, 0.0005580897559411824, 0.0354514978826046, -0.0039322287775576115, 0.03788866102695465, 0.05906534940004349, 0.04612176492810249, -0.011059393174946308, 0.016312604770064354, -0.00918543990701437, 0.004631120711565018, -0.006594675127416849, -0.018145596608519554, -0.003968069329857826, -0.0059649040922522545, -0.03207223862409592, -0.031867437064647675, -0.036168310791254044, 0.0010604985291138291, -0.01807391457259655, -0.008606869727373123, 0.008248464204370975, -0.044647179543972015, 0.026767827570438385, 0.03383354842662811, -0.022917520254850388, -0.04767827317118645, 0.0033997392747551203, 0.011141314171254635, -0.025928132236003876, 0.027115993201732635, -0.010388661175966263, 0.01921057514846325, 0.03549245744943619, 0.0011750605190172791, -0.06819958984851837, 0.000605450535658747, 0.019323216751217842, -0.023982498794794083, -0.031109662726521492, 0.026972630992531776, 0.02560044638812542, 0.040182460099458694, 0.015862036496400833, -0.004974166862666607, 0.003153975121676922, -0.03852355107665062, -0.025661887601017952, 0.011212995275855064, 0.0033536585979163647, 0.02431018464267254, -0.04812883958220482, -0.029102588072419167, -0.023859616369009018, -0.02416682057082653, 0.02902066521346569, -0.02574380859732628, 0.033157698810100555, 0.052511636167764664, -0.04718674346804619, 0.010337459854781628, 0.010752187110483646, -0.013424874283373356, -0.0027725284453481436, -0.002777648391202092, 0.03491900861263275, -0.03870787471532822, 0.01074194721877575, -0.02752560004591942, 0.024535467848181725, 0.033055298030376434, 0.031232545152306557, 0.01897505111992359, 0.026952149346470833, -0.016937255859375, -0.018544962629675865, 0.010782907716929913, 0.007931018248200417, 0.013189350254833698, 0.021668218076229095, 0.003315257839858532, -0.02668590471148491, -0.01458201464265585, -0.04143176227807999, 0.040530625730752945, 0.01154068112373352, -0.042312417179346085, 0.040428224951028824, -0.02312232367694378, -0.0038989479653537273, 0.01604636013507843, -0.0056525785475969315, -0.036721281707286835, -0.008970396593213081, 0.019824985414743423, 0.0059649040922522545, 0.04341835901141167, -0.03878979757428169, 0.04927574098110199, -0.03719232976436615, -0.006026345305144787, 0.012257494032382965, 0.03287097439169884, -0.03643455356359482, -0.02140197344124317, 0.00695820152759552, -0.005381213966757059, -0.02461738884449005, 0.004137032199651003, 0.054354868829250336, 0.021156208589673042, 0.03006516396999359, -0.024392105638980865, -0.04943958297371864, 0.0406944714486599, 0.0003852867230307311, -0.01936417818069458, -0.028344813734292984, -0.02803760953247547, 0.011735244654119015, 0.013045987114310265, 0.061277229338884354, 0.029532674700021744, -0.011284676380455494, -0.025477563962340355, -0.014428411610424519, 0.012564699165523052, 0.03582014515995979, -0.02020387165248394, 0.06160491332411766, -0.008207502774894238, -0.043950848281383514, 0.0198147464543581, 0.03352634608745575, 0.01265686098486185, 0.012267733924090862, -0.007997579872608185, -0.020490597933530807, 0.02193446271121502, -0.00551945623010397, 0.014377210289239883, -0.02158629707992077, 0.030536212027072906, -0.011591882444918156, -0.013496555387973785, -0.01398808415979147, -0.010286259464919567, 0.0009939373703673482, -0.003008052473887801, -0.02521131932735443, 0.00474120257422328, 0.0012096210848540068, 0.025026995688676834, -9.424164454685524e-05, 0.01112083438783884, 0.004208713304251432, 0.024494506418704987, 0.022815117612481117, 0.015216905623674393, 0.003947588615119457, -0.01148948073387146, -0.05591137334704399, 0.047473467886447906, 0.06185067817568779, 0.011110593564808369, 0.007116924040019512, -0.0036890243645757437, 0.021012846380472183, -0.03192887827754021, 0.0009395363740622997, -0.011223236098885536, -0.03283001109957695, 0.017705269157886505, 0.014141686260700226, 0.02832433395087719, -0.03524669632315636, 0.022815117612481117, -0.010803388431668282, 0.021135728806257248, 0.02863154001533985, -0.006625395733863115, -0.012298454530537128, -0.005204570945352316, 0.027464158833026886, 0.036270711570978165, 0.005877862684428692, 0.04337739571928978, 0.057426922023296356, -0.0076238131150603294, -0.0018624324584379792, -0.005703779403120279, -0.019743064418435097, 0.059556879103183746, -0.024494506418704987, -0.02818097174167633, -0.0359635055065155, -0.018145596608519554, 0.006650995928794146, 0.004362315870821476, -0.002106916857883334, -0.014326009899377823, 0.020869484171271324, 0.00018768326845020056, -0.01986594684422016, -0.024678830057382584, -0.014684416353702545, -0.008709271438419819, 0.009738409891724586, -0.003530301619321108, -0.0166812501847744, 0.009892012923955917, -0.019005771726369858, 0.015872277319431305, -0.01856544427573681, -0.00817166268825531, -0.021258611232042313, 0.0370284840464592, -0.0268907081335783, 0.04481102153658867, -0.012892385013401508, 0.0419028103351593, -0.051774341613054276, 0.0009952173568308353, 0.04423757269978523, 0.021258611232042313, -0.012605659663677216, 0.03065909445285797, 0.021033326163887978, 0.01985570602118969, -0.019435858353972435, -0.002831409452483058, -0.0029978123493492603, -0.04427853226661682, -0.003950148820877075, 0.0011648202780634165, 0.026870228350162506, -0.001858592382632196, -0.022753676399588585, 0.022466951981186867, -0.005186650436371565, 0.010035375133156776, -0.04517966881394386, 0.06574194878339767, -0.0051431297324597836, 0.047063861042261124, 0.05214298889040947, 0.00638987123966217, -0.039240363985300064, -0.03143734857439995, 0.024637870490550995, -0.03422267735004425, -0.010224818252027035, 0.045589275658130646, -0.013240550644695759, -0.0004217673558741808, 0.029635077342391014, -0.00687115965411067, 0.025129398331046104, 0.00804365985095501, 0.02451498806476593, -0.008376466110348701, -0.0023782814387232065, 0.01683485321700573, 0.012370135635137558, 0.02650158293545246, -0.03506237268447876, -0.02381865493953228, -0.0005033687921240926, 0.011407558806240559, 0.004651600960642099, -0.00990737322717905, -0.026112455874681473, -0.02099236659705639, 0.004933205898851156, 0.03901508077979088, 0.0013401834294199944, -0.014151927083730698, -0.0333625003695488, 0.04640848934650421, 0.009205920621752739, 0.03094581887125969, 0.003264056984335184, -0.026071494445204735, 0.018852168694138527, 0.02465835027396679, 0.012237013317644596, 0.0034663004335016012, -0.027402717620134354, -0.007209085859358311, -0.009190560318529606, -0.008176782168447971, -0.027771364897489548, 0.002693166956305504, 0.0702066645026207, -0.022405510768294334, -0.06353006511926651, 0.03995717689394951, -0.04046918451786041, -0.0492347776889801, -0.025784770026803017, -0.04837460443377495, -0.03381307050585747, -0.0039271083660423756, 0.013353193178772926, 0.004339275881648064, -0.020275553688406944, 0.06266989558935165, -0.03268665075302124, 0.0050637684762477875, -0.004106311593204737, -0.02090020477771759, 0.0425991415977478, -0.030085645616054535, 0.04235338047146797, 0.02119717001914978, 0.013793520629405975, -0.01633308455348015, -0.028590578585863113, -0.01782815158367157, 0.015472909435629845, -0.026112455874681473, 0.06140011176466942, 0.014418171718716621, 0.0824129581451416, -0.04210761561989784, -0.009810090996325016, 0.03045429103076458, -0.005196890793740749, 0.010414261370897293, 0.03174455463886261, 0.03784770146012306, -0.07372928410768509, 0.00563209829851985, 0.01000465452671051, 0.018135355785489082, -0.007413889281451702, -0.038892198354005814, 0.021750139072537422, -0.0187702476978302, -0.03147830814123154, -0.049644384533166885, 0.07581828534603119, 0.02055203914642334, 0.026010053232312202, 0.0049127256497740746, 0.014817538671195507, -0.03244088590145111, -0.004838484339416027, -0.06045801565051079, 0.008407186716794968, -0.011806925758719444, 0.002859569853171706, 0.05161049962043762, 0.06066281720995903, -0.06926456838846207, 0.026276297867298126, -0.015677712857723236, -0.003386939177289605, -0.0044570378959178925, -0.046531371772289276, 0.00856590922921896, -0.022303108125925064, -0.008227983489632607, -0.015493390150368214, -0.04690001904964447, 0.003983429633080959, 0.02867249958217144, -0.0010368181392550468, -0.0045363991521298885, 0.0017062698025256395, 0.0016051479615271091, 0.0011558601399883628, -0.007229566108435392, 0.006482033059000969, 0.04550735279917717, -0.03199031949043274, 0.023347606882452965, 0.016957735642790794, 0.0008672151016071439, 0.002657326404005289, -0.013865201734006405, -0.03676224127411842, -0.018729286268353462, 0.03743809461593628, 0.013066467829048634, -0.04616272449493408, 0.046777136623859406, -0.022446472197771072, 0.007966859266161919, -0.02134053222835064, -0.01714205928146839, -0.007772295735776424, 0.03743809461593628, 0.026071494445204735, 0.0901135727763176, 0.008571029640734196, 0.0002102436701534316, 0.003084853757172823, 0.059597838670015335, 0.013240550644695759, 0.027853285893797874, 0.034447960555553436, 0.023654812946915627, 0.026583503931760788, 0.015821075066924095, -0.04046918451786041, -0.04603984206914902, -0.005135449580848217, 0.04509774595499039, 0.010158257558941841, 0.014305529184639454, -0.027791844680905342, -0.020971884950995445, -0.058164212852716446, 0.014991621486842632, -0.05423198640346527, -0.024781232699751854, -0.03844163194298744, 0.008611990138888359, -0.0031642152462154627, -0.02584621123969555, -0.08204431086778641, 0.006246509030461311, 0.005030487664043903, 0.03838019073009491, -0.032113201916217804, 0.02203686349093914, 0.04186185076832771, -0.013783280737698078, -0.0034995810128748417, -0.005806181114166975, 0.02818097174167633, -0.008089740760624409, -0.04341835901141167, -0.01732638292014599, -0.017705269157886505, -0.05644386261701584, -0.015964439138770103, 0.015012102201581001, 0.006722677033394575, 0.009948333725333214, 0.04218953475356102, 0.05820517614483833, 0.04694097861647606, 0.026030534878373146, -0.023654812946915627, -0.010516663081943989, 0.014520573429763317, -0.04829268157482147, 0.012626140378415585, 0.020080991089344025, -0.011755725368857384, 0.008253583684563637, -0.02381865493953228, 0.012011729180812836, -0.0015705873956903815, -0.026808787137269974, 0.025047477334737778, 0.01603611931204796, -0.002360361162573099, 0.006313070189207792, 0.027607521042227745, -0.008007819764316082, -0.009784490801393986, 0.001804831437766552, 0.03153974935412407, -0.056525785475969315, 0.005724259652197361, -0.021504374220967293, -0.011581641621887684, -0.0017830710858106613, -0.009118879213929176, 0.0008339345222339034, -0.009513125754892826, 0.04927574098110199, 0.016599329188466072, 0.04358220100402832, -0.0006348910974338651, -0.003113014390692115, 0.005381213966757059, -0.014244087971746922, -0.03608638793230057, -0.01856544427573681, 0.006313070189207792, -0.05136473476886749, -0.01970210298895836, -0.03362874686717987, -0.022446472197771072, 0.0576317235827446, -0.04431949183344841, -0.0436641201376915, -0.0021849980112165213, 0.008924315683543682, -0.028201451525092125, -0.0027981288731098175, 0.04235338047146797, -0.003909188322722912, 0.029245950281620026, 0.028733940795063972, 0.037417612969875336, 0.005606497637927532, 0.004183113109320402, 0.004213833715766668, 0.006394991651177406, 0.003141174791380763, -0.010255538858473301, -0.03455036133527756, 0.019599702209234238, 0.04354123771190643, 0.017049897462129593, -0.013568236492574215, 0.01190932746976614, 0.010700986720621586, -0.03180599585175514, 0.0026624463498592377, -0.006400111597031355, 0.05996648594737053, 0.013629677705466747, 0.0020390755962580442, 0.011929808184504509, -0.05472351610660553, -0.0569353923201561, 0.0225283931940794, -0.004899925552308559, 0.015923477709293365, -0.018299199640750885, -0.056771550327539444, -0.01986594684422016, 0.06279277801513672, 0.007444609887897968, -0.005831781774759293, 0.0036839041858911514, 0.005360733717679977, 0.00666123628616333, -0.023060882464051247, -0.025969093665480614, -0.009876652620732784, 0.0010195377981290221, 0.008673431351780891, -0.0253137219697237, -0.04218953475356102, 0.02476075105369091, -0.014797057956457138, 0.025539005175232887, -0.027709923684597015, 0.013936882838606834, 0.005785700865089893, 0.010368180461227894, -0.0006460912409238517, 0.005714019760489464, 0.01344535406678915, -0.006302829831838608, 0.012185812927782536, -0.0016345884650945663, 0.0309048593044281, -0.004649041220545769, 0.022671755403280258, -0.006533233914524317, 0.028652019798755646, -0.025887170806527138, 0.0092571210116148, -0.01698845624923706, 0.018084155395627022, 0.011161794885993004, 0.010388661175966263, 0.022446472197771072, -0.02639918029308319, -0.01643548719584942, 0.021852541714906693, -0.015565071254968643, 0.010311859659850597, -0.02207782492041588, -0.03278905153274536, 0.022016383707523346, -0.0009350563050247729, 0.00790029764175415, -0.0403258241713047, 0.045630235224962234, 0.005114969331771135, -0.008489107713103294, 0.01793055236339569, -0.021852541714906693, -0.001845792168751359, 0.024924594908952713, 0.04143176227807999, 0.006640756037086248, -0.014592254534363747, 0.030781976878643036, 0.029204988852143288, 0.004679761826992035, 0.007265406660735607, 0.004825684241950512, 0.022548872977495193, 0.03119158372282982, -0.0024730032309889793, -0.026010053232312202, 0.06570098549127579, -0.02451498806476593, -0.027361758053302765, 0.03360826522111893, -0.015339787118136883, -0.034591324627399445, 0.005975143983960152, -0.03524669632315636, 0.00684555945917964, 0.05255259573459625, 0.032809533178806305, -0.03348538279533386, -0.00364038348197937, 0.02392105758190155, 0.03129398450255394, -0.03598398715257645, -0.008560789749026299, -0.007567491848021746, -0.0017190700164064765, -0.025661887601017952, 0.009579687379300594, 0.04530255123972893, 0.028488175943493843, -0.01124371588230133, 0.03907652199268341, -0.0022566793486475945, 0.009164960123598576, 0.019876185804605484, 0.05017687380313873, 0.027894247323274612, -0.012370135635137558, -0.01876000687479973, -0.0022029185201972723, -0.02238503098487854, 0.04067398980259895, 0.025375163182616234, -0.03723328933119774, 0.03817538544535637, 0.013291751965880394, 0.013936882838606834, 0.01975330524146557, -0.005790820810943842, 0.00148226588498801, -0.03278905153274536, 0.004065351095050573, 0.00993297342211008, 0.004738642834126949, -0.010956991463899612, -0.005908582825213671, 0.012441816739737988, -0.009477285668253899, -0.041022155433893204, -0.051037050783634186, -0.025641407817602158, 0.008048780262470245, -0.0029542914126068354, 0.04415564984083176, 0.0024294822942465544, -0.011079872958362103, 0.023859616369009018, -0.024453546851873398, -0.006246509030461311, -0.010793148539960384, -0.0309048593044281, -0.025477563962340355, 0.029041146859526634, 0.011212995275855064, -0.003747905371710658, 0.009415844455361366, 0.01599515974521637, 0.009001117199659348, -0.009922732599079609, -0.028099050745368004, -0.00941072404384613, 0.006154347211122513, 0.018708806484937668, 0.014735616743564606, -0.0012313814368098974, -0.00442119687795639, -0.04247625917196274, 0.0018880328861996531, -0.02818097174167633, 0.024822192266583443, 0.006092906463891268, -0.03252280876040459, 0.03502140939235687, 0.048948053270578384, -0.002961971564218402, -0.015964439138770103, -0.021156208589673042, -0.02088996395468712, -0.031416866928339005, 0.0026470862794667482, 0.01856544427573681, -0.023941537365317345, 0.021361012011766434, -0.01059858500957489, -0.006502513308078051, -0.005007447209209204, -0.0041703125461936, -0.05931111425161362, 0.00023616412363480777, 0.010875069536268711, -0.03366970643401146, -0.011643082834780216, -0.006681716535240412, -0.019282257184386253, 0.01185812707990408, -0.004495438188314438, -0.024986036121845245, -0.014653695747256279, -0.019937627017498016, 0.016271643340587616, -0.033997394144535065, -0.000680331839248538, 0.01089555025100708, -0.0074753304943442345, -0.06484080851078033, 0.01487897988408804, -0.0370284840464592, -0.029450753703713417, 0.019988829270005226, 0.005186650436371565, 0.011202755384147167, 0.0272388756275177, -0.008110221475362778, -0.019528020173311234, 0.07876745611429214, -0.05455967038869858, -0.007516290992498398, -0.02412586100399494, 0.009661608375608921, -0.0025792450178414583, -0.004874324891716242, -0.005831781774759293, 0.02064420096576214, -0.012257494032382965, 0.04317259415984154, -0.03237944468855858, 0.00173955038189888, -0.04780115187168121, -0.030679574236273766, 0.010337459854781628, 0.023450009524822235, -0.020582759752869606, 0.012298454530537128, 0.02318376488983631, -0.03432507812976837, 0.007838856428861618, 0.02074660174548626, 0.019149133935570717, -0.018145596608519554, -0.03950661048293114, -0.0006950521492399275, -0.029553154483437538, 0.02867249958217144, 0.016363805159926414, -0.006610035430639982, -0.029307391494512558, 0.038503073155879974, 0.01103891246020794, 0.022323589771986008, 0.007250046357512474, -0.029635077342391014, -0.032850492745637894, 0.0340588353574276, -0.02065443992614746, 0.04460621625185013, 0.04976726695895195, 0.010316980071365833, 0.01638428494334221, -0.007019642274826765, 0.02570284903049469, -0.01037330087274313, 0.02070564031600952, 0.0149301802739501, 0.006594675127416849, 0.012103891000151634, -0.03301433473825455, -0.005178970284759998, 0.00423431396484375, -0.007628933060914278, 0.025887170806527138, 0.03133494779467583, -0.04309067130088806, 0.039793334901332855, 0.006748277693986893, -0.0036275831516832113, 0.023593371734023094, 0.005616737995296717, 0.0040807113982737064, -0.01608731970191002, 0.048456523567438126, -0.04415564984083176, -0.012912864796817303, -0.012257494032382965, 0.02832433395087719, 0.002544684335589409, 0.02748463861644268, 0.02105380780994892, -0.02099236659705639, -0.009344163350760937, -0.00446727778762579, -0.03514429181814194, -0.006231148727238178, -0.0031488549429923296, -0.0235728919506073, -0.02709551341831684, -0.04091975465416908, -0.03618879243731499, 0.037909142673015594, 0.012738781981170177, -0.031273506581783295, 0.003896387992426753, 0.01768478751182556, 0.006236268673092127, 0.03174455463886261, -0.006118506658822298, 0.009272481314837933, -0.017305901274085045, -0.054150063544511795, -0.007219325751066208, 0.0130971884354949, 0.013455594889819622, -0.003136054612696171, -0.012482777237892151, -0.012267733924090862, -0.0032358963508158922, -0.02555948495864868, -0.0026496462523937225, 0.01738782413303852, 0.02891826443374157, 0.026603983715176582, 0.003233336377888918, -0.018882889300584793, -0.04792403429746628, 0.011049152351915836, -0.01391640305519104, 0.04354123771190643, -0.009415844455361366, -0.03663935884833336, -0.02242599055171013, 0.0333625003695488, 0.025928132236003876, -0.013936882838606834, 0.01758238673210144, 0.00643595214933157, -0.03418171778321266, 0.03358778730034828, -0.011714763939380646, -0.03254328668117523, 0.011694284155964851, -0.0198147464543581, -0.01185812707990408, 0.018104635179042816, 0.06660211831331253, -0.03397691249847412, -0.01044498197734356, 0.052020106464624405, -0.03205176070332527, 0.027177434414625168, -0.07282815128564835, 0.049152858555316925, -0.04825172200798988, -0.046531371772289276, 0.025825729593634605, 0.014674175530672073, -0.005345373414456844, 0.04010054096579552, -0.006789238192141056, 0.002626605797559023, -0.009451684542000294, 0.035328615456819534, -0.005401694215834141, -0.017008936032652855, -0.019732823595404625, -0.0006131306872703135, -0.030372370034456253, -0.009308322332799435, -0.029082106426358223, 0.017991993576288223, 0.0047488827258348465, 0.014428411610424519, 0.00817166268825531, 0.002288679825142026, 0.03964997082948685, 0.003059253329411149, 0.035287655889987946, -0.010409141890704632, 0.07528579235076904, 0.013537515886127949, 0.010496183298528194, -0.037110406905412674, -0.018903369084000587, -0.016568608582019806, 0.018360640853643417, 0.0232247244566679, -0.004882005043327808, 0.013936882838606834, -0.016005398705601692, -0.010158257558941841, -0.02207782492041588, 0.016896294429898262, 0.0011219395091757178, 0.0011974609224125743, -0.022507913410663605, 0.020377954468131065, -0.01649692840874195, 0.026563024148344994, 0.022303108125925064, 0.04546639323234558, -0.015196424908936024, -0.022548872977495193, -0.009886892512440681, 0.01367063820362091, -0.013271271251142025, -0.03977285325527191, 0.03950661048293114, 0.012554459273815155, 0.008555669337511063, -4.080071084899828e-05, 0.009461925365030766, 0.0007302527083083987, -0.000619530794210732, -0.0027827685698866844, -0.010588345117866993, 0.0004899285268038511, -0.012933345511555672, -0.002330920659005642, -0.00698380172252655, -0.013015267439186573, 0.03303481638431549, 0.032113201916217804, 0.01797151379287243, -0.020869484171271324, 0.010608824901282787, -0.000775053515098989, 0.0035635821986943483, -0.0031846954952925444, 0.04481102153658867, -0.04163656756281853, -0.06049897521734238, 0.03668031841516495, 0.010772667825222015, -0.015769874677062035, -0.009820330888032913, -0.017510704696178436, -0.012216532602906227, 0.023859616369009018, -0.03967045247554779, -0.07143548876047134, 0.008632470853626728, -0.01826847903430462, 0.0010144177358597517, -0.01220629271119833, -0.02034723572432995, 0.023347606882452965, 0.027771364897489548, -0.004183113109320402, -0.024945074692368507, -0.015053062699735165, -0.027300316840410233, 0.007654533721506596, 0.008739992044866085, -0.0034560603089630604, 0.021156208589673042, -0.017244460061192513, 0.0029491714667528868, -0.011960528790950775, -0.006446192506700754, -0.0019392338581383228, -0.01738782413303852, -0.01094675064086914, -0.038646433502435684, 0.02615341544151306, -0.012329175136983395, 0.012810463085770607, 0.012257494032382965, 0.00544777512550354, -0.02015267126262188, 0.007884937338531017, -0.0041600726544857025, -0.00046176803880371153, -0.03491900861263275, -0.014715136960148811, 0.021258611232042313, -0.010557624511420727, -0.0007635332876816392, 0.02744367904961109, 0.03407931327819824, -0.005621857941150665, -0.05021783709526062, -0.016773412004113197, 0.010875069536268711, -0.03045429103076458, -0.04091975465416908, -0.0333625003695488, -0.004265034571290016, -0.010660026222467422, 0.001943073933944106, 0.014100725762546062, 0.041124556213617325, 0.007567491848021746, 0.028099050745368004, -0.032358963042497635, -0.003194935619831085, 0.031621672213077545, 0.02441258542239666, 0.011581641621887684, -0.0070964437909424305, 0.02189350128173828, -0.0016473886789754033, 0.01354775670915842, 0.014305529184639454, 0.012114130891859531, -0.019026251509785652, -0.008038540370762348, 0.006072425749152899, 0.034447960555553436, 0.01227797381579876, 0.04501582682132721, 0.008750232867896557, -0.014694656245410442, 0.030495252460241318, 0.035431016236543655, -0.03715136647224426, -0.04710482060909271, 0.020971884950995445, -0.020336994901299477, -0.01614876091480255, -0.008770712651312351, 0.015022342093288898, 0.03942468762397766, 0.007895177230238914, -0.015964439138770103, 0.059106308966875076, 0.023142803460359573, -0.012841183692216873, -0.013977843336760998, 0.011141314171254635, 0.0375404953956604, 0.03000372275710106, -0.027853285893797874, 0.02748463861644268, -0.018114876002073288, 0.013568236492574215, -0.0023949218448251486, -0.03856451436877251, -0.00035296616260893643, -0.0068660397082567215, -0.02006050944328308, 0.017838390544056892, 0.03491900861263275, 0.011223236098885536, 0.016466207802295685, -0.023388568311929703, 0.021033326163887978, 0.012830943800508976, -0.010547383688390255, -0.01148948073387146, 0.01044498197734356, 0.04169800877571106, -0.031621672213077545, -0.042517222464084625, -0.00288517028093338, -0.0021389173343777657, -0.020029788836836815, -0.017060138285160065, -0.011212995275855064, -0.010357940569519997, -0.005102168768644333, 0.017459504306316376, 0.016415005549788475, -0.017695028334856033, -0.0022464392241090536, -0.022999441251158714, 0.008028300479054451, -0.017295662313699722, -0.0039117480628192425, -0.02609197422862053, 0.0340588353574276, 0.013906162232160568, -0.018483523279428482, 0.0232247244566679, 0.03016756661236286, -0.008606869727373123, -0.027894247323274612, 0.016322845593094826, 0.04788307473063469, -0.004528719000518322, 0.02633773908019066, 0.008227983489632607, -0.00977937038987875, -0.0025434044655412436, -0.011212995275855064, 0.009820330888032913, -0.009451684542000294, -0.006743157748132944, -0.00517385033890605, 0.02787376567721367, 0.04522062838077545, -0.0051277694292366505, 0.004782163538038731, -0.0031181343365460634, 0.01265686098486185, -0.06258796900510788, -0.022016383707523346, 0.003717184765264392, -0.015923477709293365, -0.007086203433573246, -0.017408303916454315, 0.005319772753864527, 0.011776205152273178, 0.019333457574248314, 0.004185672849416733, 0.01882144808769226, 0.011458760127425194, -0.04431949183344841, -0.024146340787410736, 0.022344069555401802, 0.027156952768564224, -0.004167752806097269, 0.0035482218954712152, 0.04599888250231743, -0.0005158489802852273, -0.027054551988840103, 0.003891267813742161, 0.026665424928069115, -0.028652019798755646, 0.0003868867352139205, 0.013578476384282112, -0.022118786349892616, 0.004920405801385641, -0.0002334440650884062, -0.0031846954952925444, 0.00881167408078909, 0.02312232367694378, -0.007219325751066208, 0.03420219570398331, 0.014674175530672073, -0.020336994901299477, 0.0037402252200990915, -0.01664029061794281, 0.000318565551424399, 0.024965554475784302, 0.0074702100828289986, 0.04530255123972893, 0.012585179880261421, 0.029778439551591873, -0.0001963234244612977, -0.011806925758719444, 0.008105101063847542, -0.008079500868916512, -0.0253137219697237, -0.005493856035172939, 0.03782721981406212, -0.0216477382928133, 0.03690560534596443, 0.03371066972613335, 0.012114130891859531, -0.02912306785583496, 0.009518246166408062, 0.0035405417438596487, 0.008048780262470245, -0.0038503070827573538, -0.01882144808769226, -0.040735431015491486, 0.06094954162836075, -0.01206293050199747, 0.03727424889802933, 0.0033613385166972876, -0.015329547226428986, 0.01369111891835928, -0.05501024052500725, -0.007936138659715652, 0.011653323657810688, 0.02713647298514843, 0.021709179505705833, -0.006420591846108437, -0.0016358685679733753, -0.014029044657945633, -0.01985570602118969, 0.0009254561155103147, 0.061277229338884354, -0.03795010223984718, 0.008658071048557758, -0.028058089315891266, -0.009748649783432484, -0.018401600420475006, -0.01590299792587757, -0.0272388756275177, 0.019005771726369858, 0.0067994785495102406, -0.021606776863336563, 0.0025958851911127567, -0.04804691672325134, -0.029327871277928352, 0.0005596897681243718, -0.024576429277658463, -0.01569819450378418, 0.050832245498895645, 0.005975143983960152, 0.03512381389737129, -0.03381307050585747, -0.02521131932735443, -0.0006137706805020571, -0.0037351050414144993, -0.011571401730179787, 0.002841649577021599, 0.004720722325146198, 0.0034150995779782534, 0.005621857941150665, 0.013496555387973785, -0.027894247323274612, 0.03799106180667877, -0.02684974856674671, 0.00792077835649252, 0.006830199155956507, 0.02684974856674671, 0.031969837844371796, 0.010567864403128624, -0.0034586202818900347, -0.03663935884833336, 0.01753118634223938, -0.00480520399287343, 0.05754980444908142, 0.021115249022841454, 0.039445169270038605, 0.010168497450649738, 0.03215416148304939, 0.00673291739076376, -0.029204988852143288, 0.007337087765336037, -0.03334202244877815, 0.032461367547512054, -0.010977471247315407, -0.03749953582882881, 0.026665424928069115, -0.03180599585175514, -0.006000744644552469, -0.004820564296096563, 0.03846210986375809, 0.008745112456381321, -0.025518525391817093, 0.010071215219795704, 0.03174455463886261, 0.02783280611038208, -0.016322845593094826, -0.014694656245410442, 0.03045429103076458, -0.012769502587616444, -0.04571215808391571, -0.0004825684300158173, 0.0038938280194997787, 0.03555389866232872, -0.006144107319414616, 2.1620377083308995e-05, -0.018954571336507797, -0.004265034571290016, -0.0062413886189460754, -0.020285794511437416, 0.013660398311913013, 0.03739713132381439, 0.014489852823317051, 0.027771364897489548, 0.023757213726639748, -0.009052317589521408, -0.02301992103457451, 0.00821774359792471, -0.017049897462129593, -0.012687581591308117, -0.008760472759604454, -0.016722211614251137, 0.015780115500092506, 0.0028800503350794315, -0.002380841411650181, -0.014418171718716621, 0.012646620161831379, 0.02381865493953228, -0.021422453224658966, -0.01039890106767416, 0.023736733943223953, -0.029143547639250755, 0.016875814646482468, -0.04931670054793358, -0.008929436095058918, 0.02179110050201416, -0.031724072992801666, 0.0432954765856266, 0.03571774438023567, 0.029696518555283546, -0.026726866140961647, 0.0006176107563078403, -0.04841556400060654, -0.002734127687290311, -0.030085645616054535, 0.029839880764484406, 0.010567864403128624, 0.02238503098487854, -0.03784770146012306, -0.008545429445803165, 0.015216905623674393, -0.004930646158754826, -0.021033326163887978, -0.01718301884829998, 0.022118786349892616, -0.012810463085770607, -0.03035189025104046, -0.01831967942416668, 0.006615155376493931, -0.005089368671178818, 0.009282722137868404, 0.011847886256873608, -0.03100726008415222, -0.023040402680635452, -0.033546824008226395, 0.020019549876451492, -0.023163283243775368, -0.009794730693101883, -0.0029696517158299685, -0.0130971884354949, -0.009958573617041111, 0.03237944468855858, 0.012370135635137558, -0.0056525785475969315, 0.02263079397380352, 0.0336901880800724, 0.006579314824193716, 0.017889592796564102, 0.018196796998381615, -0.03383354842662811, -0.013680879026651382, -0.012533978559076786, -0.022364549338817596, 0.009292962029576302, -0.008386706002056599, -0.008048780262470245, 0.0450567863881588, 0.028099050745368004, -0.016056600958108902, -0.047022901475429535, 0.02412586100399494, -0.002428202424198389, -0.0020301153417676687, 0.01643548719584942, 0.01927201636135578, 0.013773039914667606, -0.006922360509634018, -0.014182647690176964, 0.00935952365398407, -0.009989294223487377, 0.008094861172139645, -0.01737758331000805, -0.015216905623674393, 0.0033382982946932316, -0.02713647298514843, -0.030085645616054535, 0.025231800973415375, 0.013814001344144344, -0.023060882464051247, 0.04304971173405647, 0.00407303124666214, 0.0020173152443021536, 0.04980823025107384, 0.03782721981406212, 0.005596257746219635, 0.0309048593044281, -0.041677527129650116, -0.011438279412686825, -0.002384681487455964, -0.022507913410663605, -0.004208713304251432, -0.006528113968670368, 0.03506237268447876, 0.022098304703831673, 0.015206664800643921, 0.0019507539691403508, -0.010219697840511799, 0.041083596646785736, -0.00551945623010397, -0.04034630209207535, -0.01821727678179741, -0.0017805109964683652, -0.01857568323612213, 0.016312604770064354, -0.012400856241583824, 0.012994786724448204, -0.0062772296369075775, -0.0017817910993471742, 0.0007673733634874225, 0.023101842030882835, 0.019128654152154922, 0.015288586728274822, 0.03727424889802933, -0.009236641228199005, 0.023511450737714767, -0.019374417141079903, 0.022958479821681976, 0.00817166268825531, -0.007480450440198183, -0.012237013317644596, 0.015411469154059887, 0.015718674287199974, -0.006282349582761526, 0.03715136647224426, -0.03164215013384819, 0.0043879165314137936, -0.023634331300854683, 0.026603983715176582, 0.010639545507729053, 0.028713461011648178, -0.025375163182616234, -0.016824614256620407, 0.0074702100828289986, -0.00450823875144124, 0.012021970003843307, -0.007040122989565134, -0.015124743804335594, 0.02228262834250927, 0.02490411512553692, 0.012830943800508976, -0.024474026635289192, -0.01793055236339569, -0.02168869785964489, -0.06398063898086548, 0.028549617156386375, 0.021381493657827377, 0.006200428120791912, 0.014592254534363747, -0.026235338300466537, 0.003983429633080959, 0.0033510983921587467, 0.017070377245545387, -0.01005585491657257, 0.018483523279428482, -0.014377210289239883, -0.002320680534467101, 0.00647691311314702, -0.01403928454965353, 0.036864642053842545, -0.033403463661670685, -0.004759123083204031, 0.011950287967920303, -0.04882517084479332, 0.015831315889954567, 0.010153137147426605, -0.00735244806855917, 0.010178737342357635, 0.005591137334704399, -0.02079780213534832, 0.018657606095075607, 0.038748834282159805, -0.008320145308971405, -0.01976354420185089, -0.036967046558856964, 0.003315257839858532, -0.004794963635504246, 0.0333625003695488, -0.0070964437909424305, -0.044196609407663345, 0.027320796623826027, -0.029737478122115135, 0.025477563962340355, 0.04841556400060654, 0.009948333725333214, -0.0030208525713533163, -0.014766337350010872, -0.01426456868648529, -0.02521131932735443, 0.01683485321700573, -0.007183485198765993, -0.03020852617919445, 0.007035002578049898, 0.04362316057085991, -0.02119717001914978, 0.007562371902167797, 0.036270711570978165], index=0)], model='text-embedding-3-small', usage=Usage(prompt_tokens=7, total_tokens=7), error=None, raw_response=None, input=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "build_custom_model_client()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BLAF5qTEmoyW"
+   },
+   "source": [
+    "# Issues and feedback\n",
+    "\n",
+    "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
+    "\n",
+    "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/tutorials/adalflow_text_splitter.ipynb b/notebooks/tutorials/adalflow_text_splitter.ipynb
new file mode 100644
index 00000000..66fb81c7
--- /dev/null
+++ b/notebooks/tutorials/adalflow_text_splitter.ipynb
@@ -0,0 +1,170 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "A99Pp0T7A9BM"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install adalflow[openai,groq,faiss-cpu]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "y2SVUBNeBMy5"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# You can use a setup_env file to set the OPENAI_API_KEY too\n",
+    "# (ensure you setup OPENAI_API_KEY in your project .env file) using the following commands:\n",
+    "# from adalflow.utils import setup_env\n",
+    "\n",
+    "# Prompt user to enter their API keys securely\n",
+    "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+    "\n",
+    "# Set environment variables\n",
+    "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+    "\n",
+    "print(\"API keys have been set.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "RWWG9WRt2r9L",
+    "outputId": "faad52a8-47f5-48bc-e2c3-17a5aea21254"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 788.85it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Document(id=6374a3e5-2ef9-40ba-a7b3-e18c2b466390, text='Example text. More example text. ', meta_data=None, vector=[], parent_doc_id=doc1, order=0, score=None)\n",
+      "Document(id=b46045ba-3ebb-4e66-93d5-ece2d6ace3de, text='text. Even more text to ', meta_data=None, vector=[], parent_doc_id=doc1, order=1, score=None)\n",
+      "Document(id=eba5555b-e6d6-4ca1-8452-af22295e68f8, text='to illustrate.', meta_data=None, vector=[], parent_doc_id=doc1, order=2, score=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from adalflow.components.data_process.text_splitter import TextSplitter\n",
+    "from adalflow.core.types import Document\n",
+    "\n",
+    "# Configure the splitter settings\n",
+    "text_splitter = TextSplitter(\n",
+    "    split_by=\"word\",\n",
+    "    chunk_size=5,\n",
+    "    chunk_overlap=1\n",
+    ")\n",
+    "\n",
+    "# Example document\n",
+    "doc = Document(\n",
+    "    text=\"Example text. More example text. Even more text to illustrate.\",\n",
+    "    id=\"doc1\"\n",
+    ")\n",
+    "\n",
+    "# Execute the splitting\n",
+    "splitted_docs = text_splitter.call(documents=[doc])\n",
+    "\n",
+    "for doc in splitted_docs:\n",
+    "    print(doc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "LioyB3eCAOs8",
+    "outputId": "11cddc1c-608a-4027-830f-fe30a882a766"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 489.02it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Document(id=b0c308f2-73d2-44cf-aaf2-63e8f87198e4, text='Example text. More example', meta_data=None, vector=[], parent_doc_id=doc1, order=0, score=None)\n",
+      "Document(id=3a37adff-c8ac-4cff-8b5e-9c68e0de9772, text=' text. Even more text', meta_data=None, vector=[], parent_doc_id=doc1, order=1, score=None)\n",
+      "Document(id=e1b56768-7918-4a94-8f08-a01161cb2dcf, text=' to illustrate.', meta_data=None, vector=[], parent_doc_id=doc1, order=2, score=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from adalflow.components.data_process.text_splitter import TextSplitter\n",
+    "from adalflow.core.types import Document\n",
+    "\n",
+    "# Configure the splitter settings\n",
+    "text_splitter = TextSplitter(\n",
+    "    split_by=\"token\",\n",
+    "    chunk_size=5,\n",
+    "    chunk_overlap=0\n",
+    ")\n",
+    "\n",
+    "doc = Document(\n",
+    "    text=\"Example text. More example text. Even more text to illustrate.\",\n",
+    "    id = \"doc1\"\n",
+    "    )\n",
+    "\n",
+    "splitted_docs = (text_splitter.call(documents=[doc]))\n",
+    "\n",
+    "for doc in splitted_docs:\n",
+    "    print(doc)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/poetry.lock b/poetry.lock
index 09358530..edc2b949 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -44,8 +44,8 @@ testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized",
 
 [[package]]
 name = "adalflow"
-version = "0.2.2"
-description = "The Library to Build and Auto-optimize Any LLM Task Pipeline"
+version = "0.2.5"
+description = "The Library to Build and Auto-optimize LLM Applications"
 optional = false
 python-versions = ">=3.9, <4.0"
 files = []
@@ -54,6 +54,7 @@ develop = true
 [package.dependencies]
 backoff = "^2.2.1"
 botocore = "^1.34.149"
+colorama = "^0.4.6"
 diskcache = "^5.6.3"
 jinja2 = "^3.1.3"
 jsonlines = "^4.0.0"
@@ -469,6 +470,50 @@ charset-normalizer = ["charset-normalizer"]
 html5lib = ["html5lib"]
 lxml = ["lxml"]
 
+[[package]]
+name = "black"
+version = "24.10.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"},
+    {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"},
+    {file = "black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f"},
+    {file = "black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e"},
+    {file = "black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad"},
+    {file = "black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50"},
+    {file = "black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392"},
+    {file = "black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175"},
+    {file = "black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3"},
+    {file = "black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65"},
+    {file = "black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f"},
+    {file = "black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8"},
+    {file = "black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981"},
+    {file = "black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b"},
+    {file = "black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2"},
+    {file = "black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b"},
+    {file = "black-24.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd"},
+    {file = "black-24.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f"},
+    {file = "black-24.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800"},
+    {file = "black-24.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7"},
+    {file = "black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d"},
+    {file = "black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.10)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
 [[package]]
 name = "bleach"
 version = "6.1.0"
@@ -525,6 +570,17 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >
 [package.extras]
 crt = ["awscrt (==0.21.2)"]
 
+[[package]]
+name = "cachetools"
+version = "5.5.0"
+description = "Extensible memoizing collections and decorators"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292"},
+    {file = "cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a"},
+]
+
 [[package]]
 name = "certifi"
 version = "2024.7.4"
@@ -1460,6 +1516,148 @@ tqdm = "*"
 [package.extras]
 test = ["build", "mypy", "pytest", "pytest-xdist", "ruff", "twine", "types-requests", "types-setuptools"]
 
+[[package]]
+name = "google-ai-generativelanguage"
+version = "0.6.10"
+description = "Google Ai Generativelanguage API client library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_ai_generativelanguage-0.6.10-py3-none-any.whl", hash = "sha256:854a2bf833d18be05ad5ef13c755567b66a4f4a870f099b62c61fe11bddabcf4"},
+    {file = "google_ai_generativelanguage-0.6.10.tar.gz", hash = "sha256:6fa642c964d8728006fe7e8771026fc0b599ae0ebeaf83caf550941e8e693455"},
+]
+
+[package.dependencies]
+google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
+google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
+proto-plus = ">=1.22.3,<2.0.0dev"
+protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
+
+[[package]]
+name = "google-api-core"
+version = "2.23.0"
+description = "Google API client core library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_api_core-2.23.0-py3-none-any.whl", hash = "sha256:c20100d4c4c41070cf365f1d8ddf5365915291b5eb11b83829fbd1c999b5122f"},
+    {file = "google_api_core-2.23.0.tar.gz", hash = "sha256:2ceb087315e6af43f256704b871d99326b1f12a9d6ce99beaedec99ba26a0ace"},
+]
+
+[package.dependencies]
+google-auth = ">=2.14.1,<3.0.dev0"
+googleapis-common-protos = ">=1.56.2,<2.0.dev0"
+grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
+grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
+proto-plus = [
+    {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
+    {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""},
+]
+protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
+requests = ">=2.18.0,<3.0.0.dev0"
+
+[package.extras]
+async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"]
+grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"]
+grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
+grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
+
+[[package]]
+name = "google-api-python-client"
+version = "2.154.0"
+description = "Google API Client Library for Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_api_python_client-2.154.0-py2.py3-none-any.whl", hash = "sha256:a521bbbb2ec0ba9d6f307cdd64ed6e21eeac372d1bd7493a4ab5022941f784ad"},
+    {file = "google_api_python_client-2.154.0.tar.gz", hash = "sha256:1b420062e03bfcaa1c79e2e00a612d29a6a934151ceb3d272fe150a656dc8f17"},
+]
+
+[package.dependencies]
+google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0"
+google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0.dev0"
+google-auth-httplib2 = ">=0.2.0,<1.0.0"
+httplib2 = ">=0.19.0,<1.dev0"
+uritemplate = ">=3.0.1,<5"
+
+[[package]]
+name = "google-auth"
+version = "2.36.0"
+description = "Google Authentication Library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_auth-2.36.0-py2.py3-none-any.whl", hash = "sha256:51a15d47028b66fd36e5c64a82d2d57480075bccc7da37cde257fc94177a61fb"},
+    {file = "google_auth-2.36.0.tar.gz", hash = "sha256:545e9618f2df0bcbb7dcbc45a546485b1212624716975a1ea5ae8149ce769ab1"},
+]
+
+[package.dependencies]
+cachetools = ">=2.0.0,<6.0"
+pyasn1-modules = ">=0.2.1"
+rsa = ">=3.1.4,<5"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"]
+enterprise-cert = ["cryptography", "pyopenssl"]
+pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
+reauth = ["pyu2f (>=0.1.5)"]
+requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
+
+[[package]]
+name = "google-auth-httplib2"
+version = "0.2.0"
+description = "Google Authentication Library: httplib2 transport"
+optional = false
+python-versions = "*"
+files = [
+    {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"},
+    {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"},
+]
+
+[package.dependencies]
+google-auth = "*"
+httplib2 = ">=0.19.0"
+
+[[package]]
+name = "google-generativeai"
+version = "0.8.3"
+description = "Google Generative AI High level API client library and tools."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "google_generativeai-0.8.3-py3-none-any.whl", hash = "sha256:1108ff89d5b8e59f51e63d1a8bf84701cd84656e17ca28d73aeed745e736d9b7"},
+]
+
+[package.dependencies]
+google-ai-generativelanguage = "0.6.10"
+google-api-core = "*"
+google-api-python-client = "*"
+google-auth = ">=2.15.0"
+protobuf = "*"
+pydantic = "*"
+tqdm = "*"
+typing-extensions = "*"
+
+[package.extras]
+dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "pyyaml"]
+
+[[package]]
+name = "googleapis-common-protos"
+version = "1.66.0"
+description = "Common protobufs used in Google APIs"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
+    {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
+]
+
+[package.dependencies]
+protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
+
+[package.extras]
+grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
+
 [[package]]
 name = "graphviz"
 version = "0.20.3"
@@ -1624,6 +1822,22 @@ files = [
 [package.extras]
 protobuf = ["grpcio-tools (>=1.63.2)"]
 
+[[package]]
+name = "grpcio-status"
+version = "1.62.3"
+description = "Status proto mapping for gRPC"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "grpcio-status-1.62.3.tar.gz", hash = "sha256:289bdd7b2459794a12cf95dc0cb727bd4a1742c37bd823f760236c937e53a485"},
+    {file = "grpcio_status-1.62.3-py3-none-any.whl", hash = "sha256:f9049b762ba8de6b1086789d8315846e094edac2c50beaf462338b301a8fd4b8"},
+]
+
+[package.dependencies]
+googleapis-common-protos = ">=1.5.5"
+grpcio = ">=1.62.3"
+protobuf = ">=4.21.6"
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@@ -1656,6 +1870,20 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 trio = ["trio (>=0.22.0,<0.26.0)"]
 
+[[package]]
+name = "httplib2"
+version = "0.22.0"
+description = "A comprehensive HTTP client library."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"},
+    {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"},
+]
+
+[package.dependencies]
+pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""}
+
 [[package]]
 name = "httpx"
 version = "0.27.0"
@@ -2482,8 +2710,8 @@ langchain-core = ">=0.2.38,<0.3.0"
 langchain-text-splitters = ">=0.2.0,<0.3.0"
 langsmith = ">=0.1.17,<0.2.0"
 numpy = [
-    {version = ">=1,<2", markers = "python_version < \"3.12\""},
     {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1,<2", markers = "python_version < \"3.12\""},
 ]
 pydantic = ">=1,<3"
 PyYAML = ">=5.3"
@@ -2509,8 +2737,8 @@ langchain = ">=0.2.16,<0.3.0"
 langchain-core = ">=0.2.38,<0.3.0"
 langsmith = ">=0.1.0,<0.2.0"
 numpy = [
-    {version = ">=1,<2", markers = "python_version < \"3.12\""},
     {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1,<2", markers = "python_version < \"3.12\""},
 ]
 PyYAML = ">=5.3"
 requests = ">=2,<3"
@@ -2533,8 +2761,8 @@ jsonpatch = ">=1.33,<2.0"
 langsmith = ">=0.1.75,<0.2.0"
 packaging = ">=23.2,<25"
 pydantic = [
-    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
     {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
+    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
 ]
 PyYAML = ">=5.3"
 tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
@@ -2585,8 +2813,8 @@ files = [
 httpx = ">=0.23.0,<1"
 orjson = ">=3.9.14,<4.0.0"
 pydantic = [
-    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
     {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
+    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
 ]
 requests = ">=2,<3"
 
@@ -3528,8 +3756,8 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -3600,6 +3828,17 @@ files = [
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["docopt", "pytest"]
 
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -3786,6 +4025,23 @@ files = [
 [package.dependencies]
 wcwidth = "*"
 
+[[package]]
+name = "proto-plus"
+version = "1.25.0"
+description = "Beautiful, Pythonic protocol buffers."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"},
+    {file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"},
+]
+
+[package.dependencies]
+protobuf = ">=3.19.0,<6.0.0dev"
+
+[package.extras]
+testing = ["google-api-core (>=1.31.5)"]
+
 [[package]]
 name = "protobuf"
 version = "4.25.4"
@@ -3922,6 +4178,31 @@ files = [
     {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"},
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.1"
+description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
+    {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.1"
+description = "A collection of ASN.1-based protocols modules"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"},
+    {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.4.6,<0.7.0"
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -3948,8 +4229,8 @@ files = [
 annotated-types = ">=0.4.0"
 pydantic-core = "2.20.1"
 typing-extensions = [
-    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
     {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
+    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
 ]
 
 [package.extras]
@@ -4743,6 +5024,47 @@ files = [
     {file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"},
 ]
 
+[[package]]
+name = "rsa"
+version = "4.9"
+description = "Pure-Python RSA implementation"
+optional = false
+python-versions = ">=3.6,<4"
+files = [
+    {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
+    {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.1.3"
+
+[[package]]
+name = "ruff"
+version = "0.8.0"
+description = "An extremely fast Python linter and code formatter, written in Rust."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ruff-0.8.0-py3-none-linux_armv6l.whl", hash = "sha256:fcb1bf2cc6706adae9d79c8d86478677e3bbd4ced796ccad106fd4776d395fea"},
+    {file = "ruff-0.8.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:295bb4c02d58ff2ef4378a1870c20af30723013f441c9d1637a008baaf928c8b"},
+    {file = "ruff-0.8.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7b1f1c76b47c18fa92ee78b60d2d20d7e866c55ee603e7d19c1e991fad933a9a"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb0d4f250a7711b67ad513fde67e8870109e5ce590a801c3722580fe98c33a99"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e55cce9aa93c5d0d4e3937e47b169035c7e91c8655b0974e61bb79cf398d49c"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f4cd64916d8e732ce6b87f3f5296a8942d285bbbc161acee7fe561134af64f9"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c5c1466be2a2ebdf7c5450dd5d980cc87c8ba6976fb82582fea18823da6fa362"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2dabfd05b96b7b8f2da00d53c514eea842bff83e41e1cceb08ae1966254a51df"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:facebdfe5a5af6b1588a1d26d170635ead6892d0e314477e80256ef4a8470cf3"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87a8e86bae0dbd749c815211ca11e3a7bd559b9710746c559ed63106d382bd9c"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:85e654f0ded7befe2d61eeaf3d3b1e4ef3894469cd664ffa85006c7720f1e4a2"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:83a55679c4cb449fa527b8497cadf54f076603cc36779b2170b24f704171ce70"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:812e2052121634cf13cd6fddf0c1871d0ead1aad40a1a258753c04c18bb71bbd"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:780d5d8523c04202184405e60c98d7595bdb498c3c6abba3b6d4cdf2ca2af426"},
+    {file = "ruff-0.8.0-py3-none-win32.whl", hash = "sha256:5fdb6efecc3eb60bba5819679466471fd7d13c53487df7248d6e27146e985468"},
+    {file = "ruff-0.8.0-py3-none-win_amd64.whl", hash = "sha256:582891c57b96228d146725975fbb942e1f30a0c4ba19722e692ca3eb25cc9b4f"},
+    {file = "ruff-0.8.0-py3-none-win_arm64.whl", hash = "sha256:ba93e6294e9a737cd726b74b09a6972e36bb511f9a102f1d9a7e1ce94dd206a6"},
+    {file = "ruff-0.8.0.tar.gz", hash = "sha256:a7ccfe6331bf8c8dad715753e157457faf7351c2b69f62f32c165c2dbcbacd44"},
+]
+
 [[package]]
 name = "s3transfer"
 version = "0.10.2"
@@ -5788,6 +6110,17 @@ files = [
 [package.extras]
 dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-modern-annotations", "flake8-noqa", "flake8-pyproject", "flake8-requirements", "flake8-typechecking-import", "flake8-use-fstring", "mypy", "pep8-naming", "types-PyYAML"]
 
+[[package]]
+name = "uritemplate"
+version = "4.1.1"
+description = "Implementation of RFC 6570 URI Templates"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"},
+    {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"},
+]
+
 [[package]]
 name = "urllib3"
 version = "2.2.2"
@@ -6128,5 +6461,5 @@ multidict = ">=4.0"
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.11, <4.0
-content-hash = "4d44108f296caafc4f938300bcd09141d2bed45c88bfbed06081be67f01ae868"
+python-versions = ">=3.11, <4.0"
+content-hash = "df5b3eaad85fc2f943506d095b2e3f7094982d55d461f40a7be13d9bb742fc6f"
diff --git a/pyproject.toml b/pyproject.toml
index a3b7cd42..c064d819 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,10 +17,7 @@ packages = [
 [tool.poetry.dependencies]
 python = ">=3.11, <4.0"
 adalflow = { path = "adalflow", develop = true }
-# torch = "^2.3.1"
 openai = "^1.34.0"
-# lightrag = {path = "lightrag/dist/lightrag-0.0.0a11-py3-none-any.whl"}
-# lightrag = "^0.0.0a13"
 
 
 [tool.poetry.group.dev.dependencies]
@@ -51,8 +48,25 @@ faiss-cpu = "^1.8.0.post1"
 nltk = "^3.9.1"
 ragas = "^0.1.16"
 colorama = "^0.4.6"
+black = "^24.10.0"
+ruff = "^0.8.0"
+google-generativeai = "^0.8.3"
 
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
\ No newline at end of file
+build-backend = "poetry.core.masonry.api"
+
+
+# for formatting and linting
+[tool.black]
+line-length = 88
+target-version = ["py311"]
+
+[tool.ruff]
+lint.extend-ignore = [
+    "E402",  # Ignore module-level import issues
+    "E731",
+    "UP007", # Wants | over Union, which breaks 3.8
+]
+line-length = 88
diff --git a/tutorials/adalflow_modelclient_sync_and_async.py b/tutorials/adalflow_modelclient_sync_and_async.py
new file mode 100644
index 00000000..555d0311
--- /dev/null
+++ b/tutorials/adalflow_modelclient_sync_and_async.py
@@ -0,0 +1,111 @@
+import asyncio
+import time
+from adalflow.components.model_client import (
+    OpenAIClient,
+)  # Assuming OpenAIClient with .call() and .acall() is available
+from adalflow.core.types import ModelType
+
+from getpass import getpass
+import os
+
+from adalflow.utils import setup_env
+
+# Load environment variables - Make sure to have OPENAI_API_KEY in .env file and .env is present in current folder
+if os.path.isfile(".env"):
+    setup_env(".env")
+
+# Prompt user to enter their API keys securely
+if "OPENAI_API_KEY" not in os.environ:
+    openai_api_key = getpass("Please enter your OpenAI API key: ")
+    # Set environment variables
+    os.environ["OPENAI_API_KEY"] = openai_api_key
+    print("API keys have been set.")
+
+
+# Synchronous function for benchmarking .call()
+def benchmark_sync_call(api_kwargs, runs=10):
+    """
+    Benchmark the synchronous .call() method by running it multiple times.
+
+    Parameters:
+    - api_kwargs: The arguments to be passed to the API call
+    - runs: The number of times to run the call (default is 10)
+    """
+    # List to store responses
+    responses = []
+
+    # Record the start time of the benchmark
+    start_time = time.time()
+
+    # Perform synchronous API calls for the specified number of runs
+    responses = [
+        openai_client.call(
+            api_kwargs=api_kwargs,  # API arguments
+            model_type=ModelType.LLM,  # Model type (e.g., LLM for language models)
+        )
+        for _ in range(runs)  # Repeat 'runs' times
+    ]
+
+    # Record the end time after all calls are completed
+    end_time = time.time()
+
+    # Output the results of each synchronous call
+    for i, response in enumerate(responses):
+        print(f"sync call {i + 1} completed: {response}")
+
+    # Print the total time taken for all synchronous calls
+    print(f"\nSynchronous benchmark completed in {end_time - start_time:.2f} seconds")
+
+
+# Asynchronous function for benchmarking .acall()
+async def benchmark_async_acall(api_kwargs, runs=10):
+    """
+    Benchmark the asynchronous .acall() method by running it multiple times concurrently.
+
+    Parameters:
+    - api_kwargs: The arguments to be passed to the API call
+    - runs: The number of times to run the asynchronous call (default is 10)
+    """
+    # Record the start time of the benchmark
+    start_time = time.time()
+
+    # Create a list of asynchronous tasks for the specified number of runs
+    tasks = [
+        openai_client.acall(
+            api_kwargs=api_kwargs,  # API arguments
+            model_type=ModelType.LLM,  # Model type (e.g., LLM for language models)
+        )
+        for _ in range(runs)  # Repeat 'runs' times
+    ]
+
+    # Execute all tasks concurrently and wait for them to finish
+    responses = await asyncio.gather(*tasks)
+
+    # Record the end time after all tasks are completed
+    end_time = time.time()
+
+    # Output the results of each asynchronous call
+    for i, response in enumerate(responses):
+        print(f"Async call {i + 1} completed: {response}")
+
+    # Print the total time taken for all asynchronous calls
+    print(f"\nAsynchronous benchmark completed in {end_time - start_time:.2f} seconds")
+
+
+if __name__ == "__main__":
+    # Initialize the OpenAI client
+    openai_client = OpenAIClient()
+
+    # Sample prompt for testing
+    prompt = "Tell me a joke."
+
+    model_kwargs = {"model": "gpt-3.5-turbo", "temperature": 0.5, "max_tokens": 100}
+    api_kwargs = openai_client.convert_inputs_to_api_kwargs(
+        input=prompt, model_kwargs=model_kwargs, model_type=ModelType.LLM
+    )
+    # Run both benchmarks
+    print("Starting synchronous benchmark...\n")
+    benchmark_sync_call(api_kwargs)
+
+    print("\nStarting asynchronous benchmark...\n")
+    asyncio.run(benchmark_async_acall(api_kwargs))
diff --git a/tutorials/component.ipynb b/tutorials/component.ipynb
deleted file mode 100644
index 17e371a4..00000000
--- a/tutorials/component.ipynb
+++ /dev/null
@@ -1,711 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import re\n",
-    "from adalflow.core import Component, Generator\n",
-    "from adalflow.components.model_client import OpenAIClient\n",
-    "from adalflow.components.model_client import GroqAPIClient\n",
-    "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "template_doc = r\"\"\"<SYS> You are a doctor </SYS> User: {{input_str}}\"\"\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's turn on the library log to help with debugging."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<RootLogger root (INFO)>"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from adalflow.utils import get_logger\n",
-    "get_logger()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Toy example\n",
-    "\n",
-    "class DocQA(Component):\n",
-    "    def __init__(self):\n",
-    "        super(DocQA, self).__init__()\n",
-    "        self.doc = Generator(\n",
-    "            template=template_doc,\n",
-    "            model_client=OpenAIClient(),\n",
-    "            model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n",
-    "        )\n",
-    "\n",
-    "    def call(self, query: str) -> str:\n",
-    "        return self.doc(prompt_kwargs={\"input_str\": query}).data\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': [], 'preset_prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, '_api_key': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'preset_prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, '_trainable_params': []}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False}}\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'_components': OrderedDict([('doc',\n",
-       "               Generator(\n",
-       "                 model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-       "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}})\n",
-       "                 (model_client): OpenAIClient()\n",
-       "               ))]),\n",
-       " '_parameters': OrderedDict(),\n",
-       " 'training': False}"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# states\n",
-    "states = doc.to_dict()\n",
-    "print(states)\n",
-    "doc.__dict__"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'_components': OrderedDict([('doc',\n",
-       "               Generator(\n",
-       "                 model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-       "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}})\n",
-       "                 (model_client): OpenAIClient()\n",
-       "               ))]),\n",
-       " '_parameters': OrderedDict(),\n",
-       " 'training': False}"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# restore the states\n",
-    "doc2 = DocQA.from_dict(states)\n",
-    "# print(doc2.call(\"What is the capital of France?\"))\n",
-    "doc2.__dict__\n",
-    "# doc2.to_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'type': 'DocQA',\n",
-       " 'data': {'_components': {'_ordered_dict': True,\n",
-       "   'data': [('doc',\n",
-       "     {'type': 'Generator',\n",
-       "      'data': {'_components': {'_ordered_dict': True,\n",
-       "        'data': [('prompt',\n",
-       "          {'type': 'Prompt',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "            'prompt_variables': [],\n",
-       "            'preset_prompt_kwargs': {}}}),\n",
-       "         ('model_client',\n",
-       "          {'type': 'OpenAIClient',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            '_api_key': None}})]},\n",
-       "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "       'training': False,\n",
-       "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "       'preset_prompt_kwargs': {},\n",
-       "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
-       "       'output_processors': None,\n",
-       "       '_trainable_params': []}})]},\n",
-       "  '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "  'training': False}}"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc2.to_dict() == doc.to_dict()\n",
-    "doc2.to_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# pickle to states\n",
-    "import pickle\n",
-    "# from collections import OrderedDict\n",
-    "# from openai import OpenAI # cant pickle this\n",
-    "\n",
-    "# class DummpyDocQA():\n",
-    "#     a = OrderedDict()\n",
-    "#     def __init__(self):\n",
-    "#         self.dummpy = 1\n",
-    "#         self.client = OpenAI()\n",
-    "\n",
-    "# doc_dummy = DummpyDocQA()\n",
-    "with open(\"doc.pkl\", \"wb\") as f:\n",
-    "    pickle.dump(doc.to_dict(), f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# save the serialized states to a file\n",
-    "from adalflow.utils.file_io import save_pickle, save_json\n",
-    "states = doc.to_dict()\n",
-    "# save_json(states, \"doc.json\")\n",
-    "save_pickle(states, \"doc.pkl\")\n",
-    "\n",
-    "# load the serialized states from a file\n",
-    "from adalflow.utils.file_io import load_pickle, load_json\n",
-    "states_loaded = load_pickle(\"doc.pkl\")\n",
-    "# states_loaded = load_json(\"doc.json\")\n",
-    "\n",
-    "states_loaded == states\n",
-    "\n",
-    "doc3 = DocQA.from_dict(states_loaded)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:42:48 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-14 17:42:48 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:42:48 - WARNING - [prompt_builder.py:120:compose_prompt_kwargs] - Key input_str does not exist in the prompt_kwargs.\n",
-      "2024-06-14 17:42:48 - INFO - [openai_client.py:139:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the capital of France?'}]}\n",
-      "2024-06-14 17:42:48 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:42:48 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='The capital of France is Paris.', error=None, usage=None, raw_response='The capital of France is Paris.')\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'The capital of France is Paris.'"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc3\n",
-    "doc3.call(\"What is the capital of France?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:51 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for headache?'}\n",
-      "2024-06-14 17:12:51 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:51 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
-      "2024-06-14 17:12:54 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:54 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='As a doctor, the best treatment for a headache depends on the cause of the headache. In general, some common treatments for headaches include:\\n\\n1. Over-the-counter pain relievers such as acetaminophen, ibuprofen, or aspirin\\n2. Rest and relaxation in a quiet, dark room\\n3. Hydration\\n4. Applying a cold or warm compress to the forehead or neck\\n5. Avoiding triggers such as stress, lack of sleep, or certain foods\\n6. Practicing relaxation techniques such as deep breathing, meditation, or yoga\\n7. Prescription medications for chronic or severe headaches\\n\\nIt is important to consult with a healthcare provider for a proper diagnosis and treatment plan for your specific type of headache.', error=None, usage=None, raw_response='As a doctor, the best treatment for a headache depends on the cause of the headache. In general, some common treatments for headaches include:\\n\\n1. Over-the-counter pain relievers such as acetaminophen, ibuprofen, or aspirin\\n2. Rest and relaxation in a quiet, dark room\\n3. Hydration\\n4. Applying a cold or warm compress to the forehead or neck\\n5. Avoiding triggers such as stress, lack of sleep, or certain foods\\n6. Practicing relaxation techniques such as deep breathing, meditation, or yoga\\n7. Prescription medications for chronic or severe headaches\\n\\nIt is important to consult with a healthcare provider for a proper diagnosis and treatment plan for your specific type of headache.')\n",
-      "As a doctor, the best treatment for a headache depends on the cause of the headache. In general, some common treatments for headaches include:\n",
-      "\n",
-      "1. Over-the-counter pain relievers such as acetaminophen, ibuprofen, or aspirin\n",
-      "2. Rest and relaxation in a quiet, dark room\n",
-      "3. Hydration\n",
-      "4. Applying a cold or warm compress to the forehead or neck\n",
-      "5. Avoiding triggers such as stress, lack of sleep, or certain foods\n",
-      "6. Practicing relaxation techniques such as deep breathing, meditation, or yoga\n",
-      "7. Prescription medications for chronic or severe headaches\n",
-      "\n",
-      "It is important to consult with a healthcare provider for a proper diagnosis and treatment plan for your specific type of headache.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(doc(\"What is the best treatment for headache?\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:54 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for headache?'}\n",
-      "2024-06-14 17:12:54 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:54 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
-      "2024-06-14 17:12:56 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:56 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='As a doctor, the best treatment for a headache can depend on the cause of the headache. If the headache is mild and infrequent, over-the-counter pain relievers such as ibuprofen or acetaminophen can help. Additionally, getting enough rest, staying hydrated, and practicing relaxation techniques such as deep breathing exercises or meditation can also provide relief. If the headache is severe, persistent, or accompanied by other concerning symptoms, it is important to consult a healthcare professional for a proper diagnosis and individualized treatment plan.', error=None, usage=None, raw_response='As a doctor, the best treatment for a headache can depend on the cause of the headache. If the headache is mild and infrequent, over-the-counter pain relievers such as ibuprofen or acetaminophen can help. Additionally, getting enough rest, staying hydrated, and practicing relaxation techniques such as deep breathing exercises or meditation can also provide relief. If the headache is severe, persistent, or accompanied by other concerning symptoms, it is important to consult a healthcare professional for a proper diagnosis and individualized treatment plan.')\n",
-      "As a doctor, the best treatment for a headache can depend on the cause of the headache. If the headache is mild and infrequent, over-the-counter pain relievers such as ibuprofen or acetaminophen can help. Additionally, getting enough rest, staying hydrated, and practicing relaxation techniques such as deep breathing exercises or meditation can also provide relief. If the headache is severe, persistent, or accompanied by other concerning symptoms, it is important to consult a healthcare professional for a proper diagnosis and individualized treatment plan.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(doc2(\"What is the best treatment for headache?\"))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('', DocQA(\n",
-      "  (doc): Generator(\n",
-      "    model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-      "    (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
-      "    (model_client): OpenAIClient()\n",
-      "  )\n",
-      "))\n",
-      "('doc', Generator(\n",
-      "  model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-      "  (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
-      "  (model_client): OpenAIClient()\n",
-      "))\n",
-      "('doc.prompt', Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str']))\n",
-      "('doc.model_client', OpenAIClient())\n"
-     ]
-    }
-   ],
-   "source": [
-    "# list other subcomponents\n",
-    "\n",
-    "for subcomponent in doc.named_components():\n",
-    "    print(subcomponent)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's add a parameter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from adalflow.core.parameter import Parameter\n",
-    "\n",
-    "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('demo', Parameter: demo)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# list all parameters\n",
-    "for param in doc.named_parameters():\n",
-    "    print(param)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'type': 'DocQA',\n",
-       " 'data': {'_components': {'_ordered_dict': True,\n",
-       "   'data': [('doc',\n",
-       "     {'type': 'Generator',\n",
-       "      'data': {'_components': {'_ordered_dict': True,\n",
-       "        'data': [('prompt',\n",
-       "          {'type': 'Prompt',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            '_template_string': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "            'template': <Template memory:15f114f50>,\n",
-       "            'prompt_variables': ['input_str'],\n",
-       "            'preset_prompt_kwargs': {}}}),\n",
-       "         ('model_client',\n",
-       "          {'type': 'OpenAIClient',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            'sync_client': <openai.OpenAI at 0x15cfc1cd0>,\n",
-       "            'async_client': None,\n",
-       "            '_api_key': None}})]},\n",
-       "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "       'training': False,\n",
-       "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "       'preset_prompt_kwargs': {},\n",
-       "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
-       "       'output_processors': None,\n",
-       "       '_trainable_params': []}})]},\n",
-       "  '_parameters': {'_ordered_dict': True,\n",
-       "   'data': [('demo', {'data': 'demo', 'requires_opt': True})]},\n",
-       "  'training': False}}"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc.to_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from adalflow.utils.file_io import save_json\n",
-    "\n",
-    "save_json(doc.to_dict(), \"doc.json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "OrderedDict([('demo', Parameter: demo)])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc.state_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:56 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for a cold?'}\n",
-      "2024-06-14 17:12:56 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:56 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for a cold?'}]}\n",
-      "2024-06-14 17:12:57 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:57 - INFO - [generator.py:208:call] - output: GeneratorOutput(data=\"As a doctor, I recommend a combination of rest, staying hydrated, over-the-counter cold medications (such as decongestants or pain relievers), throat lozenges, and steam inhalation. It's also important to eat a balanced diet, get plenty of rest, and avoid close contact with others to prevent spreading the cold. If symptoms persist or worsen, it's best to consult with a healthcare provider for further evaluation and treatment.\", error=None, usage=None, raw_response=\"As a doctor, I recommend a combination of rest, staying hydrated, over-the-counter cold medications (such as decongestants or pain relievers), throat lozenges, and steam inhalation. It's also important to eat a balanced diet, get plenty of rest, and avoid close contact with others to prevent spreading the cold. If symptoms persist or worsen, it's best to consult with a healthcare provider for further evaluation and treatment.\")\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"As a doctor, I recommend a combination of rest, staying hydrated, over-the-counter cold medications (such as decongestants or pain relievers), throat lozenges, and steam inhalation. It's also important to eat a balanced diet, get plenty of rest, and avoid close contact with others to prevent spreading the cold. If symptoms persist or worsen, it's best to consult with a healthcare provider for further evaluation and treatment.\""
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc.call(\"What is the best treatment for a cold?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2\n",
-      "<class 'adalflow.core.component.FunComponent'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "from adalflow.core.component import FunComponent\n",
-    "\n",
-    "def add_one(x):\n",
-    "    return x + 1\n",
-    "\n",
-    "fun_component = FunComponent(add_one)\n",
-    "print(fun_component(1))  \n",
-    "print(type(fun_component))  \n",
-    "\n",
-    "# output:\n",
-    "# 2\n",
-    "# <class 'core.component.FunComponent'>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2\n",
-      "<class 'adalflow.core.component.AddOneComponent'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "from adalflow.core.component import fun_to_component \n",
-    "\n",
-    "fun_component = fun_to_component(add_one)\n",
-    "print(fun_component(1))\n",
-    "print(type(fun_component))\n",
-    "\n",
-    "# output:\n",
-    "# 2\n",
-    "# <class 'adalflow.core.component.AddOneComponent'>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2\n",
-      "<class 'adalflow.core.component.AddOneComponent'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "# use it as a decorator\n",
-    "@fun_to_component\n",
-    "def add_one(x):\n",
-    "    return x + 1\n",
-    "\n",
-    "print(add_one(1))\n",
-    "print(type(add_one))\n",
-    "\n",
-    "# output:\n",
-    "# 2\n",
-    "# <class 'adalflow.core.component.AddOneComponent'>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:57 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for headache?Please be concise and only list the top treatments.'}\n",
-      "2024-06-14 17:12:57 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:57 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n",
-      "2024-06-14 17:12:58 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:58 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='1. Over-the-counter pain relievers such as ibuprofen or acetaminophen\\n2. Stay hydrated and rest\\n3. Apply a cold compress to the forehead\\n4. Practice relaxation techniques such as deep breathing or meditation', error=None, usage=None, raw_response='1. Over-the-counter pain relievers such as ibuprofen or acetaminophen\\n2. Stay hydrated and rest\\n3. Apply a cold compress to the forehead\\n4. Practice relaxation techniques such as deep breathing or meditation')\n",
-      "1. Over-the-counter pain relievers such as ibuprofen or acetaminophen\n",
-      "2. Stay hydrated and rest\n",
-      "3. Apply a cold compress to the forehead\n",
-      "4. Practice relaxation techniques such as deep breathing or meditation\n"
-     ]
-    }
-   ],
-   "source": [
-    "from adalflow.core.component import Sequential\n",
-    "\n",
-    "@fun_to_component\n",
-    "def enhance_query(query:str) -> str:\n",
-    "    return query + \"Please be concise and only list the top treatments.\"\n",
-    "\n",
-    "seq = Sequential(enhance_query, doc)\n",
-    "\n",
-    "query = \"What is the best treatment for headache?\"\n",
-    "print(seq(query))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# sequential with just a function, will raise error\n",
-    "# def enhance_query(query:str) -> str:\n",
-    "#     return query + \"Please be concise and only list the top treatments.\"\n",
-    "# seq2 = Sequential(enhance_query, doc)\n",
-    "# print(seq2(query))\n",
-    "# print(seq2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Sequential(\n",
-       "  (0): EnhanceQueryComponent()\n",
-       "  (1): DocQA(\n",
-       "    (doc): Generator(\n",
-       "      model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-       "      (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
-       "      (model_client): OpenAIClient()\n",
-       "    )\n",
-       "  )\n",
-       ")"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "seq"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# TODO: LLM for single choices"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/tutorials/component.py b/tutorials/component.py
new file mode 100644
index 00000000..3f454390
--- /dev/null
+++ b/tutorials/component.py
@@ -0,0 +1,119 @@
+from adalflow.core import Component, Generator
+from adalflow.components.model_client import OpenAIClient
+
+from getpass import getpass
+import os
+
+# Prompt user to enter their API keys securely
+openai_api_key = getpass("Please enter your OpenAI API key: ")
+# Set environment variables
+os.environ["OPENAI_API_KEY"] = openai_api_key
+print("API keys have been set.")
+
+template_doc = r"""<SYS> You are a doctor </SYS> User: {{input_str}}"""
+
+from adalflow.utils import get_logger
+
+get_logger()
+
+
+class DocQA(Component):
+    def __init__(self):
+        super(DocQA, self).__init__()
+        self.doc = Generator(
+            template=template_doc,
+            model_client=OpenAIClient(),
+            model_kwargs={"model": "gpt-3.5-turbo"},
+        )
+
+    def call(self, query: str) -> str:
+        return self.doc(prompt_kwargs={"input_str": query}).data
+
+
+doc = DocQA()
+# states
+states = doc.to_dict()
+print(states)
+print(doc.__dict__)
+
+# restore the states
+doc2 = DocQA.from_dict(states)
+# print(doc2.call("What is the capital of France?"))
+print(doc2.__dict__)
+print(doc2.to_dict())
+
+print(doc2.to_dict() == doc.to_dict())
+doc2.to_dict()
+
+print(doc("What is the best treatment for headache?"))
+print(doc2("What is the best treatment for headache?"))
+
+# list other subcomponents
+for subcomponent in doc.named_components():
+    print(subcomponent)
+
+from adalflow.optim.parameter import Parameter
+
+doc.register_parameter("demo", param=Parameter(data="demo"))
+
+# list all parameters
+for param in doc.named_parameters():
+    print(param)
+
+print(doc.to_dict())
+
+from adalflow.utils.file_io import save_json
+
+save_json(doc.to_dict(), "doc.json")
+
+print(doc.state_dict())
+print(doc.call("What is the best treatment for a cold?"))
+
+from adalflow.core.component import FunComponent
+
+
+def add_one(x):
+    return x + 1
+
+
+fun_component = FunComponent(add_one)
+print(fun_component(1))
+print(type(fun_component))
+# output:
+# 2
+# <class 'core.component.FunComponent'>
+
+from adalflow.core.component import fun_to_component
+
+fun_component = fun_to_component(add_one)
+print(fun_component(1))
+print(type(fun_component))
+# output:
+# 2
+# <class 'adalflow.core.component.AddOneComponent'>
+
+
+# use it as a decorator
+@fun_to_component
+def add_one(x):
+    return x + 1
+
+
+print(add_one(1))
+print(type(add_one))
+# output:
+# 2
+# <class 'adalflow.core.component.AddOneComponent'>
+
+from adalflow.core import Sequential
+
+
+@fun_to_component
+def enhance_query(query: str) -> str:
+    return query + "Please be concise and only list the top treatments."
+
+
+seq = Sequential(enhance_query, doc)
+query = "What is the best treatment for headache?"
+print(seq(query))
+print(seq)
diff --git a/tutorials/dataclass.ipynb b/tutorials/dataclass.ipynb
index 1b8cc519..e2631c2b 100644
--- a/tutorials/dataclass.ipynb
+++ b/tutorials/dataclass.ipynb
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -77,18 +77,18 @@
     }
    ],
    "source": [
-    "# it does not allow required field after optional field\n",
-    "@dataclass\n",
-    "class TrecData2:\n",
-    "    question: Question = field(\n",
-    "        metadata={\"desc\": \"The question asked by the user\"}\n",
-    "    ) # Required field, you have to provide the question field at the instantiation\n",
-    "    label: int = field(\n",
-    "        metadata={\"desc\": \"The label of the question\"}, default=0\n",
-    "    ) # Optional field\n",
-    "    metadata: dict = field(\n",
-    "        metadata={\"desc\": \"The metadata of the question\"}\n",
-    "    ) # required field"
+    "# # it does not allow required field after optional field\n",
+    "# @dataclass\n",
+    "# class TrecData2:\n",
+    "#     question: Question = field(\n",
+    "#         metadata={\"desc\": \"The question asked by the user\"}\n",
+    "#     ) # Required field, you have to provide the question field at the instantiation\n",
+    "#     label: int = field(\n",
+    "#         metadata={\"desc\": \"The label of the question\"}, default=0\n",
+    "#     ) # Optional field\n",
+    "#     metadata: dict = field(\n",
+    "#         metadata={\"desc\": \"The metadata of the question\"}\n",
+    "#     ) # required field"
    ]
   },
   {
diff --git a/tutorials/embedder.ipynb b/tutorials/embedder.ipynb
index b7a5c714..29625454 100644
--- a/tutorials/embedder.ipynb
+++ b/tutorials/embedder.ipynb
@@ -15,7 +15,6 @@
    "source": [
     "from adalflow.core.embedder import Embedder\n",
     "from adalflow.components.model_client import OpenAIClient\n",
-    "from adalflow.utils import setup_env # ensure you setup OPENAI_API_KEY in your project .env file\n",
     "\n",
     "model_kwargs = {\n",
     "    \"model\": \"text-embedding-3-small\",\n",
diff --git a/tutorials/generator.ipynb b/tutorials/generator.ipynb
index 1bf47865..e8a3fac2 100644
--- a/tutorials/generator.ipynb
+++ b/tutorials/generator.ipynb
@@ -44,7 +44,7 @@
    ],
    "source": [
     "from adalflow.core import Generator\n",
-    "from adalflow.components.model_client import OpenAIClient, get_all_messages_content, get_probabilities\n",
+    "from adalflow.components.model_client import OpenAIClient, get_probabilities\n",
     "from adalflow.utils import enable_library_logging\n",
     "\n",
     "enable_library_logging(level=\"DEBUG\")\n",
@@ -78,7 +78,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from adalflow.core import Component, Generator, Prompt\n",
+    "from adalflow.core import Component, Generator\n",
     "from adalflow.components.model_client import GroqAPIClient\n",
     "from adalflow.utils import setup_env # noqa\n",
     "\n",
diff --git a/tutorials/generator_all_providers.py b/tutorials/generator_all_providers.py
new file mode 100644
index 00000000..21d61d08
--- /dev/null
+++ b/tutorials/generator_all_providers.py
@@ -0,0 +1,53 @@
+# This doc shows how to use all different providers in the Generator class.
+
+import adalflow as adal
+
+
+def use_all_providers():
+    openai_llm = adal.Generator(
+        model_client=adal.OpenAIClient(),
+        model_kwargs={"model": "gpt-3.5-turbo"},
+    )
+    groq_llm = adal.Generator(
+        model_client=adal.GroqAPIClient(),
+        model_kwargs={"model": "llama3-8b-8192"},
+    )
+    anthropic_llm = adal.Generator(
+        model_client=adal.AnthropicAPIClient(),
+        model_kwargs={"model": "claude-3-5-sonnet-20241022"},
+    )
+    google_gen_ai_llm = adal.Generator(
+        model_client=adal.GoogleGenAIClient(),
+        model_kwargs={"model": "gemini-1.0-pro"},
+    )
+    ollama_llm = adal.Generator(
+        model_client=adal.OllamaClient(),
+        model_kwargs={"model": "llama3.2:1b"},
+    )
+    # need to run ollama pull llama3.2:1b first to use this model
+
+    # aws_bedrock_llm = adal.Generator(
+    #     model_client=adal.BedrockAPIClient(),
+    #     model_kwargs={"modelId": "amazon.mistral.instruct-7b"},
+    # )
+
+    prompt_kwargs = {"input_str": "What is the meaning of life in one sentence?"}
+
+    openai_response = openai_llm(prompt_kwargs)
+    groq_response = groq_llm(prompt_kwargs)
+    anthropic_response = anthropic_llm(prompt_kwargs)
+    google_gen_ai_response = google_gen_ai_llm(prompt_kwargs)
+    ollama_response = ollama_llm(prompt_kwargs)
+    # aws_bedrock_llm_response = aws_bedrock_llm(prompt_kwargs)
+
+    print(f"OpenAI: {openai_response}\n")
+    print(f"Groq: {groq_response}\n")
+    print(f"Anthropic: {anthropic_response}\n")
+    print(f"Google GenAI: {google_gen_ai_response}\n")
+    print(f"Ollama: {ollama_response}\n")
+    # print(f"AWS Bedrock: {aws_bedrock_llm_response}\n")
+
+
+if __name__ == "__main__":
+    adal.setup_env()
+    use_all_providers()
diff --git a/tutorials/model_client.ipynb b/tutorials/model_client.ipynb
index 3228d8c1..3e5b7b06 100644
--- a/tutorials/model_client.ipynb
+++ b/tutorials/model_client.ipynb
@@ -26,7 +26,6 @@
    "source": [
     "from adalflow.components.model_client import OpenAIClient\n",
     "from adalflow.core.types import ModelType\n",
-    "from adalflow.utils import setup_env\n",
     "\n",
     "openai_client = OpenAIClient()\n",
     "\n",
diff --git a/tutorials/parser_note.py b/tutorials/parser_note.py
index fdc23fce..80c2c009 100644
--- a/tutorials/parser_note.py
+++ b/tutorials/parser_note.py
@@ -271,6 +271,62 @@ class User(DataClass):
     print(parsed_user)
 
 
+def dataclass_parser():
+    from dataclasses import dataclass, field
+    from adalflow.components.output_parsers import DataClassParser
+    from adalflow.core import DataClass
+
+    @dataclass
+    class SampleDataClass(DataClass):
+        description: str = field(metadata={"description": "A sample description"})
+        category: str = field(metadata={"description": "Category of the sample"})
+        value: int = field(metadata={"description": "A sample integer value"})
+        status: str = field(metadata={"description": "Status of the sample"})
+
+        __input_fields__ = [
+            "description",
+            "category",
+        ]  # Define which fields are input fields
+        __output_fields__ = ["value", "status"]  # Define which fields are output fields
+
+    # Initialize the DataClassParser with SampleDataClass
+    parser = DataClassParser(
+        data_class=SampleDataClass, return_data_class=True, format_type="json"
+    )
+    print("DataClassParser instance created:\n", parser)
+
+    # Get formatted instructions for the output format
+    output_format_str = parser.get_output_format_str()
+    print("\nOutput format string:\n", output_format_str)
+
+    # Get formatted instructions for the input format
+    input_format_str = parser.get_input_format_str()
+    print("\nInput format string:\n", input_format_str)
+
+    # Parse a sample JSON string
+    user_input = '{"description": "Parsed description", "category": "Sample Category", "value": 100, "status": "active"}'
+    parsed_instance = parser.call(user_input)
+    print("\nParsed DataClass instance:\n", parsed_instance)
+
+    samples = [
+        SampleDataClass(
+            description="Sample description",
+            category="Sample category",
+            value=100,
+            status="active",
+        ),
+        SampleDataClass(
+            description="Another description",
+            category="Another category",
+            value=200,
+            status="inactive",
+        ),
+    ]
+
+    examples_str = parser.get_examples_str(examples=samples)
+    print(f"examples_str: {examples_str}")
+
+
 if __name__ == "__main__":
     examples_of_different_ways_to_parse_string()
     int_parser()
@@ -281,3 +337,4 @@ class User(DataClass):
     yaml_parser()
     json_output_parser()
     yaml_output_parser()
+    dataclass_parser()
diff --git a/tutorials/react_note.ipynb b/tutorials/react_note.ipynb
index 2d0f2be0..0b647a4b 100644
--- a/tutorials/react_note.ipynb
+++ b/tutorials/react_note.ipynb
@@ -8,7 +8,7 @@
    "source": [
     "from adalflow.components.agent import ReActAgent\n",
     "from adalflow.core import Generator, ModelClientType, ModelClient\n",
-    "from adalflow.utils import setup_env, get_logger\n",
+    "from adalflow.utils import setup_env\n",
     "\n",
     "# get_logger(level=\"DEBUG\")\n",
     "\n",
diff --git a/tutorials/retriever.ipynb b/tutorials/retriever.ipynb
index 413dc465..c464f46b 100644
--- a/tutorials/retriever.ipynb
+++ b/tutorials/retriever.ipynb
@@ -536,7 +536,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -551,7 +551,7 @@
    ],
    "source": [
     "# try to use title this time\n",
-    "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"]\n",
+    "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"] # no \n",
     "\n",
     "reranker.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n",
     "\n",
@@ -1300,7 +1300,6 @@
     "\n",
     "from adalflow.tracing import trace_generator_call\n",
     "\n",
-    "from adalflow.utils import setup_env\n",
     "\n",
     "# 1. set up the tracing for failed call as the retriever has generator attribute\n",
     "\n",
diff --git a/tutorials/tools.ipynb b/tutorials/tools.ipynb
index 3490fe7d..c32b9420 100644
--- a/tutorials/tools.ipynb
+++ b/tutorials/tools.ipynb
@@ -17,7 +17,6 @@
    "source": [
     "from openai import OpenAI\n",
     "import json\n",
-    "from adalflow.utils import setup_env\n",
     "\n",
     "client = OpenAI()\n",
     "\n",
@@ -106,7 +105,7 @@
    "outputs": [],
    "source": [
     "from dataclasses import dataclass\n",
-    "from typing import Any, Dict, List, Tuple\n",
+    "from typing import Any, Dict, List\n",
     "import numpy as np\n",
     "import time\n",
     "import asyncio\n",
@@ -445,12 +444,10 @@
    "source": [
     "# call all the above functions \n",
     "import nest_asyncio\n",
-    "import asyncio\n",
     "\n",
     "nest_asyncio.apply()\n",
     "\n",
     "\n",
-    "import time\n",
     "\n",
     "async def async_function_1():\n",
     "    await asyncio.sleep(1)\n",
@@ -1290,13 +1287,7 @@
     }
    ],
    "source": [
-    "import ast\n",
-    "import builtins\n",
-    "import contextlib\n",
-    "import ctypes\n",
-    "import sys\n",
     "import threading\n",
-    "import time\n",
     "\n",
     "# Define a list of safe built-ins\n",
     "SAFE_BUILTINS = {\n",
@@ -1787,7 +1778,6 @@
    "source": [
     "queries = [\"add 2 and 3\", \"search for something\", \"add points (1, 2) and (3, 4)\", \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\", \"multiply 2 with local variable x\", \"divide 2 by 3\"]\n",
     "\n",
-    "from adalflow.components.output_parsers import ListOutputParser\n",
     "from adalflow.core.string_parser import JsonParser # improve a list of json\n",
     "\n",
     "preset_prompt_kwargs = {\n",
@@ -1982,9 +1972,6 @@
     "# first check the openai's function call apis\n",
     "\n",
     "from openai import OpenAI\n",
-    "from openai.types import FunctionDefinition\n",
-    "from adalflow.utils import setup_env\n",
-    "import json\n",
     "\n",
     "client = OpenAI()\n",
     "\n",
@@ -2242,8 +2229,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "adalflow_fn_schema =\n",
-    "{\n",
+    "adalflow_fn_schema ={\n",
     "        \"type\": \"object\",\n",
     "        \"properties\": {\n",
     "            \"weather\": {\n",
@@ -2284,31 +2270,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "    llama_fn_schema = {\n",
-    "        \"type\": \"object\",\n",
-    "        \"properties\": {\"weather\": {\"$ref\": \"#/definitions/Weather\"}},\n",
-    "        \"required\": [\"weather\"],\n",
-    "        \"definitions\": {\n",
-    "            \"Weather\": {\n",
-    "                \"title\": \"Weather\",\n",
-    "                \"type\": \"object\",\n",
-    "                \"properties\": {\n",
-    "                    \"location\": {\n",
-    "                        \"title\": \"Location\",\n",
-    "                        \"desc\": \"The city and state, e.g. San Francisco, CA\",\n",
-    "                        \"type\": \"string\",\n",
-    "                    },\n",
-    "                    \"unit\": {\n",
-    "                        \"title\": \"Unit\",\n",
-    "                        \"enum\": [\"celsius\", \"fahrenheit\"],\n",
-    "                        \"type\": \"string\",\n",
-    "                    },\n",
+    "llama_fn_schema = {\n",
+    "    \"type\": \"object\",\n",
+    "    \"properties\": {\"weather\": {\"$ref\": \"#/definitions/Weather\"}},\n",
+    "    \"required\": [\"weather\"],\n",
+    "    \"definitions\": {\n",
+    "        \"Weather\": {\n",
+    "            \"title\": \"Weather\",\n",
+    "            \"type\": \"object\",\n",
+    "            \"properties\": {\n",
+    "                \"location\": {\n",
+    "                    \"title\": \"Location\",\n",
+    "                    \"desc\": \"The city and state, e.g. San Francisco, CA\",\n",
+    "                    \"type\": \"string\",\n",
     "                },\n",
-    "                \"required\": [\"location\", \"unit\"],\n",
-    "                \"additionalProperties\": false,\n",
-    "            }\n",
-    "        },\n",
-    "    }"
+    "                \"unit\": {\n",
+    "                    \"title\": \"Unit\",\n",
+    "                    \"enum\": [\"celsius\", \"fahrenheit\"],\n",
+    "                    \"type\": \"string\",\n",
+    "                },\n",
+    "            },\n",
+    "            \"required\": [\"location\", \"unit\"],\n",
+    "            \"additionalProperties\": False,\n",
+    "        }\n",
+    "    },\n",
+    "}"
    ]
   },
   {
@@ -2319,7 +2305,6 @@
    "source": [
     "# level 1, call function with default python data types\n",
     "# such as str, int, float, list, dict, etc.\n",
-    "\n",
     "def _get_current_weather(location: str, unit: str = \"fahrenheit\"):\n",
     "    \"\"\"Get the current weather in a given location\"\"\"\n",
     "    if \"tokyo\" in location.lower():\n",
diff --git a/use_cases/classification/train.py b/use_cases/classification/train.py
index f287c164..0bdbd562 100644
--- a/use_cases/classification/train.py
+++ b/use_cases/classification/train.py
@@ -126,7 +126,7 @@ def train(
         debug=False,
         max_steps=12,
         strategy="constrained",
-        optimization_order="sequential"
+        optimization_order="sequential",
     )
     # val 0.694 -> 0.833, #test 0.8472 -> 0.833, adding more shots does not help
     # NOTE: raw: 40, bootstrap: 4, max_steps: 8, strategy: random, val: 86.1, test: 86.8 (+4.2% compared with dspy)
diff --git a/use_cases/question_answering/bbh/object_count/diagnose.py b/use_cases/question_answering/bbh/object_count/diagnose.py
index cb06b903..e0a99d54 100644
--- a/use_cases/question_answering/bbh/object_count/diagnose.py
+++ b/use_cases/question_answering/bbh/object_count/diagnose.py
@@ -17,7 +17,9 @@ def prepare_task(self, sample: Example):
 
     def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:
         y_label = -1
-        if y_pred and y_pred.data:
+        if (
+            y_pred is not None and y_pred.data is not None
+        ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
         return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
 
diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
index 86654a92..280f7c1a 100644
--- a/use_cases/question_answering/bbh/object_count/train_new.py
+++ b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -43,7 +43,9 @@ def prepare_eval(
         self, sample: Example, y_pred: adal.GeneratorOutput
     ) -> Tuple[float, Dict[str, Any]]:
         y_label = -1
-        if y_pred and y_pred.data:
+        if (
+            y_pred is not None and y_pred.data is not None
+        ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
         return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
 
@@ -109,7 +111,7 @@ def train(
         **gpt_3_model,
         teacher_model_config=gpt_4o_model,
         text_optimizer_model_config=gpt_4o_model,
-        backward_engine_model_config=gpt_4o_model
+        backward_engine_model_config=gpt_4o_model,
     )
     print(adal_component)
     trainer = adal.Trainer(
diff --git a/use_cases/question_answering/bbh/word_sorting/diagnose.py b/use_cases/question_answering/bbh/word_sorting/diagnose.py
index dcd490fc..2faa9e76 100644
--- a/use_cases/question_answering/bbh/word_sorting/diagnose.py
+++ b/use_cases/question_answering/bbh/word_sorting/diagnose.py
@@ -45,7 +45,9 @@ def evaluate_one_sample(
         self, sample: Example, y_pred: adal.GeneratorOutput
     ) -> float:
         y_label = ""
-        if y_pred and y_pred.data:
+        if (
+            y_pred is not None and y_pred.data is not None
+        ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
         return self.eval_fn(
             question=sample.question, gt_answer=sample.answer, pred_answer=y_label
diff --git a/use_cases/question_answering/bbh/word_sorting/train.py b/use_cases/question_answering/bbh/word_sorting/train.py
index a2b1a418..4d1af9e3 100644
--- a/use_cases/question_answering/bbh/word_sorting/train.py
+++ b/use_cases/question_answering/bbh/word_sorting/train.py
@@ -59,7 +59,9 @@ def evaluate_one_sample(
         self, sample: Example, y_pred: adal.GeneratorOutput
     ) -> float:
         y_label = ""
-        if y_pred and y_pred.data:
+        if (
+            y_pred is not None and y_pred.data is not None
+        ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
 
         return self.eval_fn(
diff --git a/use_cases/question_answering/bbh/word_sorting/train_paper.py b/use_cases/question_answering/bbh/word_sorting/train_paper.py
index 6d3bd477..00a84830 100644
--- a/use_cases/question_answering/bbh/word_sorting/train_paper.py
+++ b/use_cases/question_answering/bbh/word_sorting/train_paper.py
@@ -59,7 +59,9 @@ def evaluate_one_sample(
         self, sample: Example, y_pred: adal.GeneratorOutput
     ) -> float:
         y_label = ""
-        if y_pred and y_pred.data:
+        if (
+            y_pred is not None and y_pred.data is not None
+        ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
 
         return self.eval_fn(
diff --git a/use_cases/unsorted/simple_qa_memory.py b/use_cases/unsorted/simple_qa_memory.py
index 3a7646f7..d0a1b0b6 100644
--- a/use_cases/unsorted/simple_qa_memory.py
+++ b/use_cases/unsorted/simple_qa_memory.py
@@ -1,6 +1,5 @@
-"""
-We just need to very basic generator that can be used to generate text from a prompt.
-"""
+# TODO: We need an example to demonstrate how to use the memory component in the genertor.
+
 
 # from adalflow.core.component import Component
 # from adalflow.core.memory import Memory