Skip to content

Instantly share code, notes, and snippets.

@tobiasraabe
Last active August 15, 2025 11:25
Show Gist options
  • Select an option

  • Save tobiasraabe/7eb55a6ad63f5f93b0faf20792733f9c to your computer and use it in GitHub Desktop.

Select an option

Save tobiasraabe/7eb55a6ad63f5f93b0faf20792733f9c to your computer and use it in GitHub Desktop.
pydantic-ai agent with LLM call in tool and inspect-ai
"""Example of using pydantic-ai and inspect-ai together.
Sorry, untested!!!
.. code-block:: console
# Run the script
$ uv run --script agent.py
To run either the agent or evaluations with inspect-ai, you need to modify
the main function.
"""
# /// script
# dependencies = [
# "pydantic-ai",
# "openai",
# "python-dotenv",
# "inspect-ai",
# "nest-asyncio",
# ]
# ///
import os
import random
from collections.abc import Awaitable
from collections.abc import Callable
from typing import Any
import nest_asyncio
from dotenv import load_dotenv
from inspect_ai import Task
from inspect_ai import task
from inspect_ai.agent import bridge
from inspect_ai.dataset import Sample
from pydantic_ai import Agent
nest_asyncio.apply()
load_dotenv()
def get_model_name(model_name: str) -> str:
"""Get the model name."""
# Patching the model name so inspect-ai can trace the calls.
if os.getenv("EVALUATING"):
return f"inspect/openai/{model_name}"
return model_name
### Start of the pydantic-ai example ###
agent = Agent(
model=get_model_name("gpt-4o"),
system_prompt=(
"You're a dice game, you should roll the die and see if the number "
"you get back matches the user's guess. If so, tell them they're a winner. "
"Use the player's name in the response."
),
)
player_name_agent = Agent(
model=get_model_name("gpt-4o"),
system_prompt=("Invent a name for a player. Return only the name, no other text."),
)
@agent.tool_plain
def roll_dice() -> str:
"""Roll a six-sided die and return the result."""
return str(random.randint(1, 6)) # noqa: S311
@agent.tool_plain
def get_player_name() -> str:
"""Get the player's name."""
return player_name_agent.run_sync().output
def main() -> None:
"""Run the agent."""
dice_result = agent.run_sync("My guess is 4")
print(dice_result.output) # noqa: T201
### End of the pydantic-ai example ###
### Start of the inspect-ai example ###
def my_agent() -> Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]:
"""Bridge the agent to the inspect-ai framework."""
async def run(sample: dict[str, Any]) -> dict[str, Any]:
dice_result = agent.run_sync(sample["input"][0]["content"])
return {"output": dice_result.output}
return run
@task
def task_agent() -> Task:
"""Evaluate the agent."""
return Task(
dataset=[Sample(input="My guess is 4")],
solver=bridge(my_agent()),
)
### End of the inspect-ai example ###
if __name__ == "__main__":
main()
# Or evaluations.
# from inspect_ai import eval
# eval(task_agent())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment