-
-
Save lepinkainen/cce44bcfe5ac8526e1ee4950a77de9a1 to your computer and use it in GitHub Desktop.
Generic web content summariser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "llm", | |
| # "llm-fragments-youtube", | |
| # "llm-fragments-reader", | |
| # "llm-fragments-github", | |
| # "llm-hacker-news", | |
| # "rich", | |
| # ] | |
| # /// | |
| """ | |
| Unified question-answer tool for web content. | |
| Automatically detects content type (YouTube, Hacker News, web articles) | |
| and routes to the appropriate llm fragment provider with tailored prompts. | |
| Usage: | |
| q <url> [question] | |
| Examples: | |
| q https://youtube.com/watch?v=... | |
| q https://news.ycombinator.com/item?id=123456 | |
| q https://example.com/article "What are the main points?" | |
| """ | |
| import sys | |
| import re | |
| import llm | |
| from rich.console import Console | |
| from rich.markdown import Markdown | |
| from rich.panel import Panel | |
| # Default prompts for each content type | |
| ARTICLE_PROMPT = """Summarize this article in a clear, well-formatted way for reading in a terminal. Use markdown formatting with bullet points where appropriate. Include the main ideas and key points.""" | |
| YOUTUBE_PROMPT = """Summarize this video in a clear, well-formatted way for reading in a terminal. Use markdown formatting with bullet points where appropriate. Include the main topics and key takeaways.""" | |
| HN_PROMPT = """Summarize the Hacker News discussion. | |
| Requirements: | |
| - Focus on the main technical and factual points raised by commenters. | |
| - Capture points of disagreement or controversy. | |
| - Include 3-6 short, illustrative direct quotes (verbatim), attributed generically (e.g. "one commenter"). | |
| - Avoid fluff, meta commentary, or praise. | |
| - Do not invent facts or quotes. | |
| Output format: | |
| - 3-6 bullet points for the summary | |
| - Then a "Notable quotes" section with the quotes and relevant context.""" | |
| def detect_content_type(url: str) -> tuple[str, str]: | |
| """ | |
| Detect content type from URL and return appropriate fragment type and default prompt. | |
| Returns: | |
| tuple[str, str]: (fragment_type, default_prompt) | |
| """ | |
| # YouTube and other video platforms supported by yt-dlp | |
| if "youtube.com" in url or "youtu.be" in url: | |
| return ("youtube", YOUTUBE_PROMPT) | |
| # TikTok (also uses yt-dlp via youtube fragment) | |
| if "tiktok.com" in url: | |
| return ("youtube", YOUTUBE_PROMPT) | |
| # Hacker News | |
| if "news.ycombinator.com/item" in url: | |
| return ("hn", HN_PROMPT) | |
| # GitHub | |
| if "github.com" in url: | |
| return ("github", ARTICLE_PROMPT) | |
| # Default: web article via reader | |
| return ("reader", ARTICLE_PROMPT) | |
| def build_fragment_arg(content_type: str, url: str) -> str: | |
| """ | |
| Construct the fragment argument for llm fragment loader. | |
| Args: | |
| content_type: Type of content ('youtube', 'hn', 'github', 'reader') | |
| url: The URL to process | |
| Returns: | |
| str: Fragment argument in format 'type:identifier' | |
| """ | |
| if content_type == "youtube": | |
| return f"youtube:{url}" | |
| elif content_type == "hn": | |
| # Extract id from URL (e.g., ?id=12345) | |
| match = re.search(r"id=(\d+)", url) | |
| if match: | |
| return f"hn:{match.group(1)}" | |
| raise ValueError(f"Could not extract HN item id from URL: {url}") | |
| elif content_type == "github": | |
| # Extract owner/repo from GitHub URL | |
| # Supports: https://github.com/owner/repo or github.com/owner/repo | |
| match = re.search(r"github\.com/([^/]+/[^/]+)", url) | |
| if match: | |
| return f"github:{match.group(1)}" | |
| raise ValueError(f"Could not extract GitHub repo from URL: {url}") | |
| elif content_type == "reader": | |
| return f"reader:{url}" | |
| raise ValueError(f"Unknown content type: {content_type}") | |
| def main(): | |
| """Main entry point for the CLI.""" | |
| console = Console(width=100) | |
| err_console = Console(stderr=True, width=100) | |
| if len(sys.argv) < 2: | |
| err_console.print("[red]usage:[/red] q <url> [question]") | |
| err_console.print("\n[yellow]Examples:[/yellow]") | |
| err_console.print(" q https://youtube.com/watch?v=...") | |
| err_console.print(" q https://news.ycombinator.com/item?id=123456") | |
| err_console.print( | |
| " q https://example.com/article 'What are the main points?'" | |
| ) | |
| sys.exit(1) | |
| url = sys.argv[1] | |
| question = " ".join(sys.argv[2:]) if len(sys.argv) > 2 else None | |
| try: | |
| # Detect content type and get default prompt | |
| content_type, default_prompt = detect_content_type(url) | |
| # Use custom question if provided, otherwise use default | |
| prompt = question if question else default_prompt | |
| # Build fragment argument | |
| fragment_arg = build_fragment_arg(content_type, url) | |
| # Show what we're processing | |
| # Determine display label based on URL, not just content_type | |
| if "tiktok.com" in url: | |
| content_type_label = "π₯ TikTok" | |
| else: | |
| content_type_label = { | |
| "youtube": "π₯ YouTube", | |
| "hn": "π¨οΈ Hacker News", | |
| "github": "π GitHub", | |
| "reader": "π Article" | |
| }.get(content_type, content_type) | |
| console.print(Panel( | |
| f"[cyan]{url}[/cyan]", | |
| title=f"[bold]{content_type_label}[/bold]", | |
| border_style="blue" | |
| )) | |
| console.print() | |
| # Load plugins to access fragment loaders | |
| llm.load_plugins() | |
| # Get fragment loaders | |
| loaders = llm.get_fragment_loaders() | |
| # Extract fragment type and argument | |
| fragment_type, _, fragment_id = fragment_arg.partition(":") | |
| # Get the appropriate loader | |
| if fragment_type not in loaders: | |
| err_console.print( | |
| f"[red]Error:[/red] Fragment loader '{fragment_type}' not found" | |
| ) | |
| err_console.print( | |
| f"Available loaders: {', '.join(loaders.keys())}" | |
| ) | |
| sys.exit(1) | |
| # Load the fragment | |
| fragment = loaders[fragment_type](fragment_id) | |
| # Handle fragment loaders that return lists (like github) | |
| if isinstance(fragment, list): | |
| fragments = fragment | |
| else: | |
| fragments = [fragment] | |
| # Get default model and execute prompt with fragment | |
| model = llm.get_model(llm.get_default_model()) | |
| response = model.prompt(prompt, fragments=fragments) | |
| # Collect response and render as markdown | |
| full_response = "" | |
| for chunk in response: | |
| full_response += chunk | |
| # Render the markdown with rich | |
| console.print(Markdown(full_response)) | |
| console.print() # Final newline | |
| except ValueError as e: | |
| error_msg = str(e) | |
| # Provide friendlier messages for common HTTP errors | |
| if ": 451" in error_msg: | |
| err_console.print( | |
| "[red]Error:[/red] Content unavailable (HTTP 451 - blocked for legal reasons)" | |
| ) | |
| else: | |
| err_console.print(f"[red]Error:[/red] {e}") | |
| sys.exit(1) | |
| except KeyboardInterrupt: | |
| err_console.print("\n[yellow]Interrupted[/yellow]") | |
| sys.exit(130) | |
| except Exception as e: | |
| err_console.print(f"[red]Error:[/red] {e}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment