Skip to content

Instantly share code, notes, and snippets.

@cj-praveen
Created March 12, 2026 04:05
Show Gist options
  • Select an option

  • Save cj-praveen/1ffc67e7a02b053bff7999ba48415866 to your computer and use it in GitHub Desktop.

Select an option

Save cj-praveen/1ffc67e7a02b053bff7999ba48415866 to your computer and use it in GitHub Desktop.
/**
* Parameters:
* videoId (string) - YouTube video ID
* lang (string) - Preferred language code (default: "en")
*
* Notes:
* • Automatically falls back to auto-generated subtitles if manual
* captions are unavailable.
* • Falls back to the first available language if the requested
* language is not found.
*/
async function getYouTubeTranscript(videoId, lang = "en") {
const WATCH_URL = "https://www.youtube.com/watch?v=";
const PLAYER_API = "https://www.youtube.com/youtubei/v1/player?key=";
const CLIENT_CONTEXT = {
client: {
clientName: "ANDROID",
clientVersion: "20.10.38"
}
};
const html = await (await fetch(WATCH_URL + videoId)).text();
const apiKeyMatch = html.match(/"INNERTUBE_API_KEY":"([^"]+)"/);
if (!apiKeyMatch) {
throw new Error("Failed to extract YouTube API key");
}
const apiKey = apiKeyMatch[1];
const playerResponse = await fetch(PLAYER_API + apiKey, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
context: CLIENT_CONTEXT,
videoId
})
});
const playerData = await playerResponse.json();
const captionTracks =
playerData?.captions?.playerCaptionsTracklistRenderer?.captionTracks;
if (!captionTracks || captionTracks.length === 0) {
throw new Error("No captions available for this video");
}
const selectedTrack =
captionTracks.find(t => t.languageCode === lang && !t.kind) ||
captionTracks.find(t => t.languageCode === lang) ||
captionTracks.find(t => !t.kind) ||
captionTracks[0];
const transcriptUrl = selectedTrack.baseUrl.replace("&fmt=srv3", "");
const xml = await (await fetch(transcriptUrl)).text();
const matches = [
...xml.matchAll(/<text start="([^"]+)" dur="([^"]+)">([^<]*)<\/text>/g)
];
return matches.map(m => ({
start: Number(m[1]),
duration: Number(m[2]),
text: decodeHtml(m[3])
}));
function decodeHtml(text) {
return text
.replace(/&amp;/g, "&")
.replace(/&#39;/g, "'")
.replace(/&quot;/g, '"')
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment