Forked from HonzaLed/youtube-subtitle-generator.ipynb
Created
November 22, 2023 22:53
-
-
Save superman-enamy/7b613e1425f255e52f64a8e7201d62f8 to your computer and use it in GitHub Desktop.
youtube-subtitle-generator.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "gpuType": "T4", | |
| "authorship_tag": "ABX9TyMIkC1UU2amUmjeuP3JAAI6", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/HonzaLed/850afa28254a6904f1107e9478608e6f/youtube-subtitle-generator.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "8C82MmdO_dE5" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# @title Install Whisper and yt-dlp\n", | |
| "! pip install whisper-ctranslate2 yt-dlp" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# @title Download YouTube video\n", | |
| "# @markdown Paste the YouTube link here (you can use anything that yt-dlp supports)\n", | |
| "import subprocess\n", | |
| "\n", | |
| "video_url = \"https://youtube.com/watch?v=dQw4w9WgXcQ\" # @param {type: \"string\"}\n", | |
| "\n", | |
| "cmd = [\"yt-dlp\", \"-x\", \"--audio-format\", \"mp3\", \"-o\", \"output.mp3\", video_url]\n", | |
| "process = subprocess.run(cmd, capture_output=True)\n", | |
| "\n", | |
| "print(process.stdout.decode())\n", | |
| "\n", | |
| "if not len(process.stderr) == 0:\n", | |
| " print(\"There was an error:\")\n", | |
| " print(process.stderr.decode())" | |
| ], | |
| "metadata": { | |
| "id": "TsptwN5V_peT" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# @title Generate subtitles using Whisper\n", | |
| "! whisper-ctranslate2 output.mp3 --model large-v2" | |
| ], | |
| "metadata": { | |
| "id": "qudRwisRBlJk" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# @title Download subtitles\n", | |
| "# @markdown You can download:\n", | |
| "# @markdown 1. The plaintext transcript (txt)\n", | |
| "# @markdown 2. Multiple subtitles formats (srt, vtt, tsv)\n", | |
| "# @markdown 3. Whisper format? (JSON)\n", | |
| "\n", | |
| "from google.colab import files\n", | |
| "\n", | |
| "format = \"srt\" # @param [\"txt\", \"srt\", \"vtt\", \"tsv\", \"json\"]\n", | |
| "\n", | |
| "files.download(f\"output.{format}\")" | |
| ], | |
| "metadata": { | |
| "id": "T-VhTCsHPJoc" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment