Created
March 12, 2026 04:05
-
-
Save cj-praveen/1ffc67e7a02b053bff7999ba48415866 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Parameters: | |
| * videoId (string) - YouTube video ID | |
| * lang (string) - Preferred language code (default: "en") | |
| * | |
| * Notes: | |
| * • Automatically falls back to auto-generated subtitles if manual | |
| * captions are unavailable. | |
| * • Falls back to the first available language if the requested | |
| * language is not found. | |
| */ | |
| async function getYouTubeTranscript(videoId, lang = "en") { | |
| const WATCH_URL = "https://www.youtube.com/watch?v="; | |
| const PLAYER_API = "https://www.youtube.com/youtubei/v1/player?key="; | |
| const CLIENT_CONTEXT = { | |
| client: { | |
| clientName: "ANDROID", | |
| clientVersion: "20.10.38" | |
| } | |
| }; | |
| const html = await (await fetch(WATCH_URL + videoId)).text(); | |
| const apiKeyMatch = html.match(/"INNERTUBE_API_KEY":"([^"]+)"/); | |
| if (!apiKeyMatch) { | |
| throw new Error("Failed to extract YouTube API key"); | |
| } | |
| const apiKey = apiKeyMatch[1]; | |
| const playerResponse = await fetch(PLAYER_API + apiKey, { | |
| method: "POST", | |
| headers: { "content-type": "application/json" }, | |
| body: JSON.stringify({ | |
| context: CLIENT_CONTEXT, | |
| videoId | |
| }) | |
| }); | |
| const playerData = await playerResponse.json(); | |
| const captionTracks = | |
| playerData?.captions?.playerCaptionsTracklistRenderer?.captionTracks; | |
| if (!captionTracks || captionTracks.length === 0) { | |
| throw new Error("No captions available for this video"); | |
| } | |
| const selectedTrack = | |
| captionTracks.find(t => t.languageCode === lang && !t.kind) || | |
| captionTracks.find(t => t.languageCode === lang) || | |
| captionTracks.find(t => !t.kind) || | |
| captionTracks[0]; | |
| const transcriptUrl = selectedTrack.baseUrl.replace("&fmt=srv3", ""); | |
| const xml = await (await fetch(transcriptUrl)).text(); | |
| const matches = [ | |
| ...xml.matchAll(/<text start="([^"]+)" dur="([^"]+)">([^<]*)<\/text>/g) | |
| ]; | |
| return matches.map(m => ({ | |
| start: Number(m[1]), | |
| duration: Number(m[2]), | |
| text: decodeHtml(m[3]) | |
| })); | |
| function decodeHtml(text) { | |
| return text | |
| .replace(/&/g, "&") | |
| .replace(/'/g, "'") | |
| .replace(/"/g, '"') | |
| .replace(/</g, "<") | |
| .replace(/>/g, ">"); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment