Skip to content

Instantly share code, notes, and snippets.

@lepinkainen
Created January 12, 2026 18:33
Show Gist options
  • Select an option

  • Save lepinkainen/cce44bcfe5ac8526e1ee4950a77de9a1 to your computer and use it in GitHub Desktop.

Select an option

Save lepinkainen/cce44bcfe5ac8526e1ee4950a77de9a1 to your computer and use it in GitHub Desktop.
Generic web content summariser
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "llm",
# "llm-fragments-youtube",
# "llm-fragments-reader",
# "llm-fragments-github",
# "llm-hacker-news",
# "rich",
# ]
# ///
"""
Unified question-answer tool for web content.
Automatically detects content type (YouTube, Hacker News, web articles)
and routes to the appropriate llm fragment provider with tailored prompts.
Usage:
q <url> [question]
Examples:
q https://youtube.com/watch?v=...
q https://news.ycombinator.com/item?id=123456
q https://example.com/article "What are the main points?"
"""
import sys
import re
import llm
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
# Default prompts for each content type
ARTICLE_PROMPT = """Summarize this article in a clear, well-formatted way for reading in a terminal. Use markdown formatting with bullet points where appropriate. Include the main ideas and key points."""
YOUTUBE_PROMPT = """Summarize this video in a clear, well-formatted way for reading in a terminal. Use markdown formatting with bullet points where appropriate. Include the main topics and key takeaways."""
HN_PROMPT = """Summarize the Hacker News discussion.
Requirements:
- Focus on the main technical and factual points raised by commenters.
- Capture points of disagreement or controversy.
- Include 3-6 short, illustrative direct quotes (verbatim), attributed generically (e.g. "one commenter").
- Avoid fluff, meta commentary, or praise.
- Do not invent facts or quotes.
Output format:
- 3-6 bullet points for the summary
- Then a "Notable quotes" section with the quotes and relevant context."""
def detect_content_type(url: str) -> tuple[str, str]:
"""
Detect content type from URL and return appropriate fragment type and default prompt.
Returns:
tuple[str, str]: (fragment_type, default_prompt)
"""
# YouTube and other video platforms supported by yt-dlp
if "youtube.com" in url or "youtu.be" in url:
return ("youtube", YOUTUBE_PROMPT)
# TikTok (also uses yt-dlp via youtube fragment)
if "tiktok.com" in url:
return ("youtube", YOUTUBE_PROMPT)
# Hacker News
if "news.ycombinator.com/item" in url:
return ("hn", HN_PROMPT)
# GitHub
if "github.com" in url:
return ("github", ARTICLE_PROMPT)
# Default: web article via reader
return ("reader", ARTICLE_PROMPT)
def build_fragment_arg(content_type: str, url: str) -> str:
"""
Construct the fragment argument for llm fragment loader.
Args:
content_type: Type of content ('youtube', 'hn', 'github', 'reader')
url: The URL to process
Returns:
str: Fragment argument in format 'type:identifier'
"""
if content_type == "youtube":
return f"youtube:{url}"
elif content_type == "hn":
# Extract id from URL (e.g., ?id=12345)
match = re.search(r"id=(\d+)", url)
if match:
return f"hn:{match.group(1)}"
raise ValueError(f"Could not extract HN item id from URL: {url}")
elif content_type == "github":
# Extract owner/repo from GitHub URL
# Supports: https://github.com/owner/repo or github.com/owner/repo
match = re.search(r"github\.com/([^/]+/[^/]+)", url)
if match:
return f"github:{match.group(1)}"
raise ValueError(f"Could not extract GitHub repo from URL: {url}")
elif content_type == "reader":
return f"reader:{url}"
raise ValueError(f"Unknown content type: {content_type}")
def main():
"""Main entry point for the CLI."""
console = Console(width=100)
err_console = Console(stderr=True, width=100)
if len(sys.argv) < 2:
err_console.print("[red]usage:[/red] q <url> [question]")
err_console.print("\n[yellow]Examples:[/yellow]")
err_console.print(" q https://youtube.com/watch?v=...")
err_console.print(" q https://news.ycombinator.com/item?id=123456")
err_console.print(
" q https://example.com/article 'What are the main points?'"
)
sys.exit(1)
url = sys.argv[1]
question = " ".join(sys.argv[2:]) if len(sys.argv) > 2 else None
try:
# Detect content type and get default prompt
content_type, default_prompt = detect_content_type(url)
# Use custom question if provided, otherwise use default
prompt = question if question else default_prompt
# Build fragment argument
fragment_arg = build_fragment_arg(content_type, url)
# Show what we're processing
# Determine display label based on URL, not just content_type
if "tiktok.com" in url:
content_type_label = "πŸŽ₯ TikTok"
else:
content_type_label = {
"youtube": "πŸŽ₯ YouTube",
"hn": "πŸ—¨οΈ Hacker News",
"github": "πŸ™ GitHub",
"reader": "πŸ“„ Article"
}.get(content_type, content_type)
console.print(Panel(
f"[cyan]{url}[/cyan]",
title=f"[bold]{content_type_label}[/bold]",
border_style="blue"
))
console.print()
# Load plugins to access fragment loaders
llm.load_plugins()
# Get fragment loaders
loaders = llm.get_fragment_loaders()
# Extract fragment type and argument
fragment_type, _, fragment_id = fragment_arg.partition(":")
# Get the appropriate loader
if fragment_type not in loaders:
err_console.print(
f"[red]Error:[/red] Fragment loader '{fragment_type}' not found"
)
err_console.print(
f"Available loaders: {', '.join(loaders.keys())}"
)
sys.exit(1)
# Load the fragment
fragment = loaders[fragment_type](fragment_id)
# Handle fragment loaders that return lists (like github)
if isinstance(fragment, list):
fragments = fragment
else:
fragments = [fragment]
# Get default model and execute prompt with fragment
model = llm.get_model(llm.get_default_model())
response = model.prompt(prompt, fragments=fragments)
# Collect response and render as markdown
full_response = ""
for chunk in response:
full_response += chunk
# Render the markdown with rich
console.print(Markdown(full_response))
console.print() # Final newline
except ValueError as e:
error_msg = str(e)
# Provide friendlier messages for common HTTP errors
if ": 451" in error_msg:
err_console.print(
"[red]Error:[/red] Content unavailable (HTTP 451 - blocked for legal reasons)"
)
else:
err_console.print(f"[red]Error:[/red] {e}")
sys.exit(1)
except KeyboardInterrupt:
err_console.print("\n[yellow]Interrupted[/yellow]")
sys.exit(130)
except Exception as e:
err_console.print(f"[red]Error:[/red] {e}")
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment