Last active
November 22, 2023 22:53
-
-
Save HonzaLed/850afa28254a6904f1107e9478608e6f to your computer and use it in GitHub Desktop.
youtube-subtitle-generator.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "gpuType": "T4", | |
| "authorship_tag": "ABX9TyMIkC1UU2amUmjeuP3JAAI6", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/HonzaLed/850afa28254a6904f1107e9478608e6f/youtube-subtitle-generator.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "8C82MmdO_dE5" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# @title Install Whisper and yt-dlp\n", | |
| "! pip install whisper-ctranslate2 yt-dlp" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# @title Download YouTube video\n", | |
| "# @markdown Paste the YouTube link here (you can use anything that yt-dlp supports)\n", | |
| "import subprocess\n", | |
| "\n", | |
| "video_url = \"https://youtube.com/watch?v=dQw4w9WgXcQ\" # @param {type: \"string\"}\n", | |
| "\n", | |
| "cmd = [\"yt-dlp\", \"-x\", \"--audio-format\", \"mp3\", \"-o\", \"output.mp3\", video_url]\n", | |
| "process = subprocess.run(cmd, capture_output=True)\n", | |
| "\n", | |
| "print(process.stdout.decode())\n", | |
| "\n", | |
| "if not len(process.stderr) == 0:\n", | |
| " print(\"There was an error:\")\n", | |
| " print(process.stderr.decode())" | |
| ], | |
| "metadata": { | |
| "id": "TsptwN5V_peT" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# @title Generate subtitles using Whisper\n", | |
| "! whisper-ctranslate2 output.mp3 --model large-v2" | |
| ], | |
| "metadata": { | |
| "id": "qudRwisRBlJk" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# @title Download subtitles\n", | |
| "# @markdown You can download:\n", | |
| "# @markdown 1. The plaintext transcript (txt)\n", | |
| "# @markdown 2. Multiple subtitles formats (srt, vtt, tsv)\n", | |
| "# @markdown 3. Whisper format? (JSON)\n", | |
| "\n", | |
| "from google.colab import files\n", | |
| "\n", | |
| "format = \"srt\" # @param [\"txt\", \"srt\", \"vtt\", \"tsv\", \"json\"]\n", | |
| "\n", | |
| "files.download(f\"output.{format}\")" | |
| ], | |
| "metadata": { | |
| "id": "T-VhTCsHPJoc" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment