tobiasraabe · August 15, 2025 11:25
diff --git a/agent.py b/agent.py
 """Example of using pydantic-ai and inspect-ai together.

 Sorry, untested!!!

 .. code-block:: console

   # Run the script
   $ uv run --script agent.py

 To run either the agent or evaluations with inspect-ai, you need to modify
 the main function.

 """

 # /// script
 # dependencies = [
 #   "pydantic-ai",
 #   "openai",
 #   "python-dotenv",
 #   "inspect-ai",
 #   "nest-asyncio",
 # ]
 # ///

 import os
 import random
 from collections.abc import Awaitable
 from collections.abc import Callable
 from typing import Any

 import nest_asyncio
 from dotenv import load_dotenv
 from inspect_ai import Task
 from inspect_ai import task
 from inspect_ai.agent import bridge
 from inspect_ai.dataset import Sample
 from pydantic_ai import Agent

 nest_asyncio.apply()

 load_dotenv()


 def get_model_name(model_name: str) -> str:
    """Get the model name."""
    # Patching the model name so inspect-ai can trace the calls.
    if os.getenv("EVALUATING"):
        return f"inspect/openai/{model_name}"
    return model_name


 ### Start of the pydantic-ai example ###

 agent = Agent(
    model=get_model_name("gpt-4o"),
    system_prompt=(
        "You're a dice game, you should roll the die and see if the number "
        "you get back matches the user's guess. If so, tell them they're a winner. "
        "Use the player's name in the response."
    ),
 )


 player_name_agent = Agent(
    model=get_model_name("gpt-4o"),
    system_prompt=("Invent a name for a player. Return only the name, no other text."),
 )


 @agent.tool_plain
 def roll_dice() -> str:
    """Roll a six-sided die and return the result."""
    return str(random.randint(1, 6))  # noqa: S311


 @agent.tool_plain
 def get_player_name() -> str:
    """Get the player's name."""
    return player_name_agent.run_sync().output


 def main() -> None:
    """Run the agent."""
    dice_result = agent.run_sync("My guess is 4")
    print(dice_result.output)  # noqa: T201


 ### End of the pydantic-ai example ###

 ### Start of the inspect-ai example ###


 def my_agent() -> Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]:
    """Bridge the agent to the inspect-ai framework."""

    async def run(sample: dict[str, Any]) -> dict[str, Any]:
        dice_result = agent.run_sync(sample["input"][0]["content"])
        return {"output": dice_result.output}

    return run


 @task
 def task_agent() -> Task:
    """Evaluate the agent."""
    return Task(
        dataset=[Sample(input="My guess is 4")],
        solver=bridge(my_agent()),
    )


 ### End of the inspect-ai example ###

 if __name__ == "__main__":
    main()
    
    # Or evaluations.
    # from inspect_ai import eval
    # eval(task_agent())
	"""Example of using pydantic-ai and inspect-ai together.

	Sorry, untested!!!

	.. code-block:: console

	# Run the script
	$ uv run --script agent.py

	To run either the agent or evaluations with inspect-ai, you need to modify
	the main function.

	"""

	# /// script
	# dependencies = [
	# "pydantic-ai",
	# "openai",
	# "python-dotenv",
	# "inspect-ai",
	# "nest-asyncio",
	# ]
	# ///

	import os
	import random
	from collections.abc import Awaitable
	from collections.abc import Callable
	from typing import Any

	import nest_asyncio
	from dotenv import load_dotenv
	from inspect_ai import Task
	from inspect_ai import task
	from inspect_ai.agent import bridge
	from inspect_ai.dataset import Sample
	from pydantic_ai import Agent

	nest_asyncio.apply()

	load_dotenv()


	def get_model_name(model_name: str) -> str:
	"""Get the model name."""
	# Patching the model name so inspect-ai can trace the calls.
	if os.getenv("EVALUATING"):
	return f"inspect/openai/{model_name}"
	return model_name


	### Start of the pydantic-ai example ###

	agent = Agent(
	model=get_model_name("gpt-4o"),
	system_prompt=(
	"You're a dice game, you should roll the die and see if the number "
	"you get back matches the user's guess. If so, tell them they're a winner. "
	"Use the player's name in the response."
	),
	)


	player_name_agent = Agent(
	model=get_model_name("gpt-4o"),
	system_prompt=("Invent a name for a player. Return only the name, no other text."),
	)


	@agent.tool_plain
	def roll_dice() -> str:
	"""Roll a six-sided die and return the result."""
	return str(random.randint(1, 6)) # noqa: S311


	@agent.tool_plain
	def get_player_name() -> str:
	"""Get the player's name."""
	return player_name_agent.run_sync().output


	def main() -> None:
	"""Run the agent."""
	dice_result = agent.run_sync("My guess is 4")
	print(dice_result.output) # noqa: T201


	### End of the pydantic-ai example ###

	### Start of the inspect-ai example ###


	def my_agent() -> Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]:
	"""Bridge the agent to the inspect-ai framework."""

	async def run(sample: dict[str, Any]) -> dict[str, Any]:
	dice_result = agent.run_sync(sample["input"][0]["content"])
	return {"output": dice_result.output}

	return run


	@task
	def task_agent() -> Task:
	"""Evaluate the agent."""
	return Task(
	dataset=[Sample(input="My guess is 4")],
	solver=bridge(my_agent()),
	)


	### End of the inspect-ai example ###

	if __name__ == "__main__":
	main()

	# Or evaluations.
	# from inspect_ai import eval
	# eval(task_agent())
No results found