Created
November 4, 2025 08:51
-
-
Save cyberandy/7d4bf48e2a7052c69e9e8c4efbce2483 to your computer and use it in GitHub Desktop.
ChatGPT - SSE recorder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import asyncio, datetime, os, re, json, nest_asyncio | |
| from playwright.async_api import async_playwright | |
| nest_asyncio.apply() | |
| os.makedirs("streams", exist_ok=True) | |
| # ──────────────────────────────────────────────── | |
| # Utility: extract internal entity annotations | |
| # ──────────────────────────────────────────────── | |
| def extract_entities_from_stream(text): | |
| """Parse raw SSE text and return structured entity data.""" | |
| entities = [] | |
| for match in re.finditer(r'"content_references"\s*:\s*\[(.*?)\]', text, re.DOTALL): | |
| try: | |
| inner = "[" + match.group(1) + "]" | |
| data = json.loads(inner) | |
| for item in data: | |
| if isinstance(item, dict) and "matched_text" in item: | |
| entities.append({ | |
| "matched_text": item.get("matched_text"), | |
| "start_idx": item.get("start_idx"), | |
| "end_idx": item.get("end_idx"), | |
| "refs": item.get("refs", []), | |
| "type": item.get("type"), | |
| "invalid": item.get("invalid") | |
| }) | |
| except Exception: | |
| pass | |
| return entities | |
| # ──────────────────────────────────────────────── | |
| # Intercept conversation streams | |
| # ──────────────────────────────────────────────── | |
| STREAM_PATHS = ( | |
| "backend-api/f/conversation", | |
| "backend-api/conversation", | |
| "backend-anon/f/conversation", | |
| ) | |
| async def handle_route(route, request): | |
| url = request.url | |
| method = request.method | |
| response = await route.fetch() | |
| ct = response.headers.get("content-type", "") | |
| print(f"🚦 {method} {url}") | |
| print(f" content-type: {ct}") | |
| is_stream = any(p in url for p in STREAM_PATHS) and "text/event-stream" in ct | |
| if is_stream: | |
| body = (await response.body()).decode(errors="ignore") | |
| ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") | |
| fname = f"streams/stream_{ts}.txt" | |
| print(f"\n══════════════════════════════════════════════════════════") | |
| print(f"[{ts}] Captured stream → {fname}") | |
| print(f"URL: {url}") | |
| print("──────────────────────────────────────────────────────────") | |
| print(body[:1500]) | |
| print("──────────────────────────────────────────────────────────") | |
| with open(fname, "w", encoding="utf-8") as f: | |
| f.write(body) | |
| # Extract internal entity annotations | |
| entities = extract_entities_from_stream(body) | |
| if entities: | |
| print(f"✅ Found {len(entities)} entity annotations") | |
| for e in entities[:10]: # preview first 10 | |
| print(f" - {e}") | |
| # also save a JSON dump alongside the text | |
| json_path = fname.replace(".txt", "_entities.json") | |
| with open(json_path, "w", encoding="utf-8") as jf: | |
| json.dump(entities, jf, indent=2) | |
| print(f"💾 Entities saved → {json_path}") | |
| else: | |
| print("⚠️ No entity annotations detected in this stream.") | |
| print("══════════════════════════════════════════════════════════\n") | |
| await route.fulfill(response=response) | |
| # ──────────────────────────────────────────────── | |
| # Main | |
| # ──────────────────────────────────────────────── | |
| async def main(): | |
| async with async_playwright() as p: | |
| browser = await p.chromium.launch(headless=False) | |
| context = await browser.new_context() | |
| # Intercept all possible conversation endpoints | |
| await context.route("**/backend-api/f/conversation*", handle_route) | |
| await context.route("**/backend-api/conversation*", handle_route) | |
| await context.route("**/backend-anon/f/conversation*", handle_route) | |
| page = await context.new_page() | |
| print("🔓 Opening ChatGPT. Log in normally.") | |
| await page.goto("https://chat.openai.com", wait_until="domcontentloaded") | |
| print(""" | |
| 🟢 Once logged in: | |
| 1️⃣ Click "New chat" | |
| 2️⃣ Send an entity-rich prompt, e.g.: | |
| "Compare OpenAI, Anthropic, Mistral AI, and Aleph Alpha | |
| in terms of founders, HQ, and flagship models." | |
| 3️⃣ This terminal will display and save both the SSE and the extracted entities. | |
| 4️⃣ Check ./streams/ for .txt and .json files. | |
| """) | |
| while True: | |
| await asyncio.sleep(5) | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment