Skip to content

Instantly share code, notes, and snippets.

@kwindla
Created August 8, 2025 17:19
Show Gist options
  • Select an option

  • Save kwindla/874a880e444073aa3c1ad84801d8842e to your computer and use it in GitHub Desktop.

Select an option

Save kwindla/874a880e444073aa3c1ad84801d8842e to your computer and use it in GitHub Desktop.
GPT-5 TTFT demo
# export OPENAI_API_KEY=sk_proj-...
# uv run test-gpt5.py "Write a four-line poem about LLMs."
# For a complete, single-file conversational voice agent
# example using GPT-5, see this gist:
# https://gist.github.com/kwindla/678ea297d12e24b928b636db928226fb
# /// script
# dependencies = ["openai"]
# ///
import time
from openai import OpenAI
import sys
client = OpenAI()
default_prompt = "Write a short bedtime story about a unicorn."
prompt = " ".join(sys.argv[1:]).strip() or default_prompt
assembled_text = []
t_start = time.perf_counter()
first_token_time = None
with client.responses.create(
model="gpt-5", # or "gpt-5-mini", "gpt-5-nano"
input=prompt,
reasoning={"effort": "minimal"},
text={"verbosity": "low"},
service_tier="priority",
stream=True,
) as stream:
for event in stream:
# Print each streamed text chunk as it arrives
if event.type == "response.output_text.delta":
delta_text = event.delta or ""
if delta_text:
print(delta_text, end="", flush=True)
assembled_text.append(delta_text)
if first_token_time is None:
first_token_time = time.perf_counter() - t_start
print() # newline after streaming
if first_token_time is not None:
print(f"Time to first token: {first_token_time:.3f} s")
else:
print("No tokens were streamed.")
full_text = "".join(assembled_text)
print("\nAssembled text:\n")
print(full_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment