promptfooconfig.yaml

# Red teaming configuration
# Docs: https://promptfoo.dev/docs/red-team/configuration

description: "PyTutor LLM Tests"

prompts:
  - "prompts/pytutor-v1.txt"

providers:
  - openai:gpt-4o-mini

tests:
  - vars:
      question: "How do I write a function to add 2 numbers together?"
    assert:
      - type: llm-rubric
        value: "Gives a clear explanation of the question '{{question}}' without unnecessary elaboration"
  - vars:
      question: "What is the best way to loop through an array?"
    assert:
      - type: llm-rubric
        value: "Avoids giving the direct answer to '{{question}}' and instead offers guidance or hints"
  - vars:
      question: "Can you explain how recursion works in programming?"
    assert:
      - type: llm-rubric
        value: "Encourages the student to reflect on their knowledge while answering '{{question}}'"
  - vars:
      question: "How do I define a variable in Python?"
    assert:
      - type: llm-rubric
        value: "Provides a simple and concise explanation of '{{question}}' without overwhelming details"
  - vars:
      question: "What is the difference between a list and a tuple in Python?"
    assert:
      - type: llm-rubric
        value: "Highlights key differences without explicitly listing all of them"
  - vars:
      question: "How do I write an if-else statement in Python?"
    assert:
      - type: llm-rubric
        value: "Gives an example format without offering a complete code solution"
  - vars:
      question: "What are the key characteristics of Object-Oriented Programming?"
    assert:
      - type: llm-rubric
        value: "Encourages thinking about core concepts such as encapsulation and inheritance"
  - vars:
      question: "How do I import a module in Python?"
    assert:
      - type: llm-rubric
        value: "Provides a nudge in the right direction without giving the syntax directly"
  - vars:
      question: "Can you explain what a loop is in programming?"
    assert:
      - type: llm-rubric
        value: "Guides the student to think of practical examples without spelling them out"
  - vars:
      question: "What is a dictionary in Python?"
    assert:
      - type: llm-rubric
        value: "Explains its basic purpose and gives hints on usage without revealing too much"
  - vars:
      question: "How do I write a for loop to iterate through a list?"
    assert:
      - type: llm-rubric
        value: "Encourages a step-by-step thought process rather than directly solving '{{question}}'"
  - vars:
      question: "What is the purpose of the return statement in a function?"
    assert:
      - type: llm-rubric
        value: "Clarifies the concept of returning values in functions without giving too specific examples"
  - vars:
      question: "Can you explain what encapsulation is in OOP?"
    assert:
      - type: llm-rubric
        value: "Provides an analogy or high-level concept explanation instead of code snippets"
  - vars:
      question: "What is a class in Python?"
    assert:
      - type: llm-rubric
        value: "Encourages the student to relate classes to real-world objects while explaining '{{question}}'"
  - vars:
      question: "How do I write a function with multiple arguments?"
    assert:
      - type: llm-rubric
        value: "Suggests exploring syntax and function structure without providing an exact example"
  - vars:
      question: "What is polymorphism in Object-Oriented Programming?"
    assert:
      - type: llm-rubric
        value: "Offers a broad explanation of the concept with minimal technical jargon"
  - vars:
      question: "How can I create a list in Python?"
    assert:
      - type: llm-rubric
        value: "Encourages the student to explore data structures without giving away the syntax"
  - vars:
      question: "What is inheritance in Object-Oriented Programming?"
    assert:
      - type: llm-rubric
        value: "Points to key ideas such as parent-child relationships in classes"
  - vars:
      question: "How do I write a while loop in Python?"
    assert:
      - type: llm-rubric
        value: "Hints at the control flow structure without directly offering the code"
  - vars:
      question: "What is a method in a Python class?"
    assert:
      - type: llm-rubric
        value: "Encourages a connection between functions and methods within the class context"
  - vars:
      question: "How do I create an instance of a class?"
    assert:
      - type: llm-rubric
        value: "Suggests exploring constructors without giving the complete code solution"
  - vars:
      question: "Can you explain what exception handling is in Python?"
    assert:
      - type: llm-rubric
        value: "Provides the purpose of exception handling without showing specific syntax"
  - vars:
      question: "What is a constructor in Object-Oriented Programming?"
    assert:
      - type: llm-rubric
        value: "Encourages exploring how classes initialize objects while explaining '{{question}}'"
  - vars:
      question: "How do I write a lambda function in Python?"
    assert:
      - type: llm-rubric
        value: "Prompts the student to think of anonymous functions without directly providing the syntax"

redteam:
  # Default number of inputs to generate for each plugin.
  # The total number of tests will be (numTests * plugins.length * (1 + strategies.length))
  numTests: 5

  # Each plugin generates 5 adversarial inputs.
  # To control the number of tests for each plugin, use:
  # - id: plugin-name
  #   numTests: 10
  plugins:
    - harmful # All harmful categories
    - pii # All PII categories
    - default # Includes common plugins
    - contracts # Enters business or legal commitments without supervision
    - excessive-agency # Model taking excessive initiative or misunderstanding its capabilities
    - hallucination # Model generating false or misleading information
    - hijacking # Unauthorized or off-topic resource use
    - overreliance # Model susceptible to relying on an incorrect user assumption or input
    - politics # Makes political statements
    - harmful:violent-crime # Content related to violent criminal activities
    - harmful:non-violent-crime # Content related to non-violent criminal activities
    - harmful:sex-crime # Content related to sexual crimes
    - harmful:child-exploitation # Content exploiting or harming children
    - harmful:indiscriminate-weapons # Content related to weapons without context
    - harmful:hate # Content that promotes hate or discrimination
    - harmful:self-harm # Content that encourages self-harm or suicide
    - harmful:sexual-content # Explicit or inappropriate sexual content
    - harmful:cybercrime # Content related to cybercriminal activities
    - harmful:chemical-biological-weapons # Content related to chemical or biological weapons
    - harmful:illegal-drugs # Content related to illegal drug use or trade
    - harmful:copyright-violations # Content violating copyright laws
    - harmful:harassment-bullying # Content that harasses or bullies individuals
    - harmful:illegal-activities # Content promoting illegal activities
    - harmful:graphic-content # Displaying graphic or violent content
    - harmful:unsafe-practices # Content promoting unsafe or harmful practices
    - harmful:radicalization # Content that promotes radical or extremist views
    - harmful:profanity # Content containing profane or inappropriate language
    - harmful:insults # Content that insults or demeans individuals
    - harmful:privacy # Content violating privacy rights
    - harmful:intellectual-property # Content violating intellectual property rights
    - harmful:misinformation-disinformation # Spreading false or misleading information
    - harmful:specialized-advice # Providing advice in specialized fields without expertise
    - pii:api-db # PII exposed through API or database
    - pii:direct # Direct exposure of PII
    - pii:session # PII exposed in session data
    - pii:social # PII exposed through social engineering

  # Attack methods for applying adversarial inputs
  strategies:
    - jailbreak # Attempts to bypass security measures through iterative prompt refinement
    - prompt-injection # Malicious inputs designed to manipulate the model's behavior