Skip to content

Instantly share code, notes, and snippets.

@sxlijin
Last active December 14, 2025 22:39
Show Gist options
  • Select an option

  • Save sxlijin/867b812ceb1aa97872937bebe5cfb4be to your computer and use it in GitHub Desktop.

Select an option

Save sxlijin/867b812ceb1aa97872937bebe5cfb4be to your computer and use it in GitHub Desktop.
Structured outputs create false confidence. Companion to https://boundaryml.com/blog/structured-outputs-create-false-confidence
#!/usr/bin/env -S uv run
# /// script
# requires-python = ">=3.10"
# dependencies = ["openai", "pydantic", "rich"]
# ///
"""
If you have uv, you can run this code by saving it as structured_outputs_quality_demo.py and then running:
chmod u+x structured_outputs_quality_demo.py
./structured_outputs_quality_demo.py
This script is a companion to https://boundaryml.com/blog/structured-outputs-create-false-confidence
"""
import json
import re
from openai import OpenAI
from pydantic import BaseModel, Field
from rich.console import Console
from rich.pretty import Pretty
class Item(BaseModel):
name: str
price: float = Field(description="per-unit item price")
quantity: float = Field(default=1, description="If not specified, assume 1")
class Receipt(BaseModel):
establishment_name: str
date: str = Field(description="YYYY-MM-DD")
total: float = Field(description="The total amount of the receipt")
currency: str = Field(description="The currency used for everything on the receipt")
items: list[Item] = Field(description="The items on the receipt")
client = OpenAI()
console = Console()
def run_receipt_extraction_structured(image_url: str):
"""Call the LLM to extract receipt data from an image URL and return the raw response."""
prompt_text = (
"""
Extract data from the receipt.
"""
)
response = client.beta.chat.completions.parse(
model="gpt-5.2-2025-12-11",
messages=[
{
"role": "system",
"content": "You are a precise receipt extraction engine. Return only structured data matching the Receipt schema.",
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt_text,
},
{"type": "image_url", "image_url": {"url": image_url}},
],
},
],
response_format=Receipt,
)
return response.choices[0].message.content, response.choices[0].message.parsed
def run_receipt_extraction_freeform(image_url: str):
"""Call the LLM to extract receipt data from an image URL and return the raw response."""
prompt_text = (
"""
Extract data from the receipt.
Explain your reasoning, then answer in JSON:
{
establishment_name: string,
// YYYY-MM-DD
date: string,
// The total amount of the receipt
total: float,
// The currency used for everything on the receipt
currency: string,
// The items on the receipt
items: [
{
name: string,
// per-unit item price
price: float,
// If not specified, assume 1
quantity: float,
}
],
}
"""
)
response = client.beta.chat.completions.parse(
model="gpt-5.2-2025-12-11",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt_text,
},
{"type": "image_url", "image_url": {"url": image_url}},
],
},
],
)
return response.choices[0].message.content, json.loads(re.search(r"```json(.*?)```", response.choices[0].message.content, flags=re.DOTALL).group(1))
def main() -> None:
images = [
{
"title": "Parsing receipt: fractional quantity",
"url": "https://boundaryml.com/receipt-fractional-quantity.jpg",
"expected": "You should expect quantity to be 0.46."
},
{
"title": "Parsing receipt: elephant",
"url": "https://boundaryml.com/receipt-elephant.jpg",
"expected": "You should expect an error."
},
{
"title": "Parsing receipt: currency exchange",
"url": "https://boundaryml.com/receipt-currency-exchange.jpg",
"expected": "You should expect a warning about mixed currencies."
},
]
print("This is a demonstration of how structured outputs create false confidence.")
for entry in images:
title = entry["title"]
url = entry["url"]
completion_structured_content, _ = run_receipt_extraction_structured(url)
completion_freeform_content, _ = run_receipt_extraction_freeform(url)
console.print("[cyan]--------------------------------[/cyan]")
console.print(f"[cyan]{title}[/cyan]")
console.print(f"Asking LLM to parse receipt from {url}")
console.print(entry['expected'])
console.print()
console.print("[cyan]Using structured outputs:[/cyan]")
console.print(completion_structured_content)
console.print()
console.print("[cyan]Parsing free-form output:[/cyan]")
console.print(completion_freeform_content)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment