Skip to content

Instantly share code, notes, and snippets.

@cyberandy
Created November 4, 2025 08:51
Show Gist options
  • Select an option

  • Save cyberandy/7d4bf48e2a7052c69e9e8c4efbce2483 to your computer and use it in GitHub Desktop.

Select an option

Save cyberandy/7d4bf48e2a7052c69e9e8c4efbce2483 to your computer and use it in GitHub Desktop.
ChatGPT - SSE recorder
import asyncio, datetime, os, re, json, nest_asyncio
from playwright.async_api import async_playwright
nest_asyncio.apply()
os.makedirs("streams", exist_ok=True)
# ────────────────────────────────────────────────
# Utility: extract internal entity annotations
# ────────────────────────────────────────────────
def extract_entities_from_stream(text):
"""Parse raw SSE text and return structured entity data."""
entities = []
for match in re.finditer(r'"content_references"\s*:\s*\[(.*?)\]', text, re.DOTALL):
try:
inner = "[" + match.group(1) + "]"
data = json.loads(inner)
for item in data:
if isinstance(item, dict) and "matched_text" in item:
entities.append({
"matched_text": item.get("matched_text"),
"start_idx": item.get("start_idx"),
"end_idx": item.get("end_idx"),
"refs": item.get("refs", []),
"type": item.get("type"),
"invalid": item.get("invalid")
})
except Exception:
pass
return entities
# ────────────────────────────────────────────────
# Intercept conversation streams
# ────────────────────────────────────────────────
STREAM_PATHS = (
"backend-api/f/conversation",
"backend-api/conversation",
"backend-anon/f/conversation",
)
async def handle_route(route, request):
url = request.url
method = request.method
response = await route.fetch()
ct = response.headers.get("content-type", "")
print(f"🚦 {method} {url}")
print(f" content-type: {ct}")
is_stream = any(p in url for p in STREAM_PATHS) and "text/event-stream" in ct
if is_stream:
body = (await response.body()).decode(errors="ignore")
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
fname = f"streams/stream_{ts}.txt"
print(f"\n══════════════════════════════════════════════════════════")
print(f"[{ts}] Captured stream → {fname}")
print(f"URL: {url}")
print("──────────────────────────────────────────────────────────")
print(body[:1500])
print("──────────────────────────────────────────────────────────")
with open(fname, "w", encoding="utf-8") as f:
f.write(body)
# Extract internal entity annotations
entities = extract_entities_from_stream(body)
if entities:
print(f"✅ Found {len(entities)} entity annotations")
for e in entities[:10]: # preview first 10
print(f" - {e}")
# also save a JSON dump alongside the text
json_path = fname.replace(".txt", "_entities.json")
with open(json_path, "w", encoding="utf-8") as jf:
json.dump(entities, jf, indent=2)
print(f"💾 Entities saved → {json_path}")
else:
print("⚠️ No entity annotations detected in this stream.")
print("══════════════════════════════════════════════════════════\n")
await route.fulfill(response=response)
# ────────────────────────────────────────────────
# Main
# ────────────────────────────────────────────────
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
context = await browser.new_context()
# Intercept all possible conversation endpoints
await context.route("**/backend-api/f/conversation*", handle_route)
await context.route("**/backend-api/conversation*", handle_route)
await context.route("**/backend-anon/f/conversation*", handle_route)
page = await context.new_page()
print("🔓 Opening ChatGPT. Log in normally.")
await page.goto("https://chat.openai.com", wait_until="domcontentloaded")
print("""
🟢 Once logged in:
1️⃣ Click "New chat"
2️⃣ Send an entity-rich prompt, e.g.:
"Compare OpenAI, Anthropic, Mistral AI, and Aleph Alpha
in terms of founders, HQ, and flagship models."
3️⃣ This terminal will display and save both the SSE and the extracted entities.
4️⃣ Check ./streams/ for .txt and .json files.
""")
while True:
await asyncio.sleep(5)
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment