Skip to content

Instantly share code, notes, and snippets.

@abdusco
Created November 7, 2025 18:48
Show Gist options
  • Select an option

  • Save abdusco/118a6a3ab41a0a1d2a5f8813f7895ca1 to your computer and use it in GitHub Desktop.

Select an option

Save abdusco/118a6a3ab41a0a1d2a5f8813f7895ca1 to your computer and use it in GitHub Desktop.
Transcribe YouTube videos
#!/usr/bin/env -S python3
from pathlib import Path
import subprocess
import json
import sys
import tempfile
import argparse
import logging
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Extract transcript from YouTube video", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("url", help="YouTube video URL")
return parser.parse_args()
def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
args = parse_args()
with tempfile.TemporaryDirectory() as tmpdir:
temp_dir = Path(tmpdir)
# Run yt-dlp
subprocess.run(
[
"yt-dlp",
"--skip-download",
"--write-auto-subs",
"--sub-format",
"json3",
args.url,
"-o",
"%(title)s",
],
cwd=temp_dir,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True,
)
# Find and parse json3 file
json_files = list(temp_dir.glob("*.json3"))
if not json_files:
logging.error("No transcript found")
sys.exit(1)
with json_files[0].open() as f:
data = json.load(f)
for event in data.get("events", []):
if "segs" in event:
for seg in event["segs"]:
if "utf8" in seg:
print(seg["utf8"], end="")
print()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment