Created
September 15, 2024 20:30
-
-
Save seqis/ab6ea9be431d50cd24e6da9e825de3e1 to your computer and use it in GitHub Desktop.
This script takes a YouTube URL from the clipboard, retrieves the video’s metadata (including title, uploader, description, tags, and chapters), and fetches the transcript if available. The combined information is then formatted and automatically copied to the clipboard for easy use.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| This Python script takes a YouTube URL from the clipboard, fetches detailed video metadata | |
| and the transcript (if available), and formats it for easy pasting. | |
| The extracted information includes: | |
| - Video Title | |
| - Uploader Name | |
| - Description | |
| - Tags | |
| - Chapters (with timestamps) | |
| - Full Transcript (if available) | |
| The formatted output is automatically copied to the clipboard for quick use. | |
| This script requires yt-dlp to fetch metadata and youtube-transcript-api to extract the transcript. | |
| Dependencies: | |
| - yt-dlp (for fetching video metadata) | |
| - youtube-transcript-api (for retrieving the transcript) | |
| - pyperclip (for clipboard interaction) | |
| """ | |
| import pyperclip | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import yt_dlp | |
| import re | |
| import tkinter as tk | |
| # Function to extract the video ID from the URL | |
| def get_video_id(url): | |
| video_id = re.search(r"(?<=v=)[^&#]+", url) | |
| if not video_id: | |
| video_id = re.search(r"(?<=be/)[^&#]+", url) | |
| return video_id.group(0) if video_id else None | |
| # Fetch transcript using YouTubeTranscriptApi | |
| def fetch_transcript(video_id): | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript_text = "\n".join([entry['text'] for entry in transcript]) | |
| return transcript_text | |
| except Exception as e: | |
| raise Exception(f"Error fetching transcript: {e}") | |
| # Fetch video metadata using yt-dlp | |
| def fetch_video_metadata(url): | |
| try: | |
| ydl_opts = {} | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info_dict = ydl.extract_info(url, download=False) | |
| video_title = info_dict.get('title', 'No title available') | |
| uploader = info_dict.get('uploader', 'No uploader available') | |
| description = info_dict.get('description', 'No description available') | |
| tags = info_dict.get('tags', []) | |
| chapters = info_dict.get('chapters', []) | |
| return { | |
| 'title': video_title, | |
| 'uploader': uploader, | |
| 'description': description, | |
| 'tags': tags, | |
| 'chapters': chapters | |
| } | |
| except Exception as e: | |
| raise Exception(f"Error fetching metadata: {e}") | |
| # Function to format chapters | |
| def format_chapters(chapters): | |
| if not chapters: | |
| return "No chapters available" | |
| formatted_chapters = [] | |
| for chapter in chapters: | |
| start_time = chapter.get('start_time', 0) | |
| title = chapter.get('title', 'No title') | |
| formatted_chapters.append(f"{start_time}s: {title}") | |
| return "\n".join(formatted_chapters) | |
| # Function to display errors in a Tkinter window | |
| def show_error(error_message): | |
| def copy_to_clipboard(): | |
| pyperclip.copy(error_message) | |
| root = tk.Tk() | |
| root.title("Script Error") | |
| text = tk.Text(root, wrap=tk.WORD, height=10, width=50) | |
| text.insert(tk.END, error_message) | |
| text.config(state=tk.DISABLED) | |
| text.pack() | |
| button = tk.Button(root, text="Copy to Clipboard", command=copy_to_clipboard) | |
| button.pack() | |
| root.mainloop() | |
| # Main function to process the YouTube URL | |
| def main(): | |
| try: | |
| url = pyperclip.paste().strip() | |
| if not re.match(r'^(https?\:\/\/)?(www\.youtube\.com|youtu\.?be)\/.+$', url): | |
| raise ValueError("Clipboard content is not a valid YouTube URL.") | |
| video_id = get_video_id(url) | |
| if not video_id: | |
| raise ValueError("Could not extract video ID from the URL.") | |
| # Fetch transcript | |
| transcript = fetch_transcript(video_id) | |
| # Fetch metadata | |
| metadata = fetch_video_metadata(url) | |
| # Format the result | |
| formatted_output = ( | |
| f"Title: {metadata['title']}\n" | |
| f"Uploader: {metadata['uploader']}\n" | |
| f"Description: {metadata['description']}\n\n" | |
| f"Tags: {', '.join(metadata['tags'])}\n\n" | |
| f"Chapters:\n{format_chapters(metadata['chapters'])}\n\n" | |
| f"Transcript:\n{transcript}" | |
| ) | |
| # Copy formatted output to clipboard | |
| pyperclip.copy(formatted_output) | |
| print("Metadata and transcript copied to clipboard.") | |
| except Exception as e: | |
| show_error(str(e)) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment