script to download audio clips from https://kg3.qq.com
tries to infer filename from title and falls back to using the song code.
this worked on january 8th, 2020; good luck getting it to work in the future!
script to download audio clips from https://kg3.qq.com
tries to infer filename from title and falls back to using the song code.
this worked on january 8th, 2020; good luck getting it to work in the future!
| const https = require('https'); | |
| const http = require('http'); | |
| const fs = require('fs'); | |
| // recommend only doing ~4-5 at a time max | |
| let i_start = 0, i_end = 4; | |
| let urls = [ | |
| ]; | |
| // part after https://kg3.qq.com/node/play?s= | |
| let codes = [ | |
| ]; | |
| let data = []; | |
| const playurl_regex = /playurl":"(http:\/\/...?.stream.kg.qq.com\/szkge-btfs\/[A-Za-z0-9?=&_.]+)"/g; | |
| const title_regex = /<title>(.*)- 全民K歌,KTV交友社区<\/title>/g; | |
| function download_file(src, dest, writeRaw) { | |
| let file = fs.createWriteStream(dest); | |
| if (writeRaw) { | |
| console.log('Saving raw HTML instead; find the player url yourself'); | |
| file.write(writeRaw); | |
| file.end(); | |
| return; | |
| } | |
| console.log(`Starting download of ${dest}`); | |
| http.get(src, function(res) { | |
| res.on('data', function(data) { | |
| file.write(data); | |
| }).on('end', function() { | |
| file.end(); | |
| console.log(`Finished download of ${dest}`); | |
| }); | |
| }); | |
| } | |
| urls.forEach((this_song, i) => { | |
| if (i < i_start || i > i_end) return; | |
| var this_song_id = codes[i]; | |
| data.push(''); | |
| https.get(this_song, { | |
| headers: { | |
| 'User-Agent': 'curl/7.64.0' // surprisingly, cURL as a user agent works... | |
| } | |
| }, (res) => { | |
| console.log(`${this_song_id} response code: ${res.statusCode}`); | |
| res.on('data', (c) => { | |
| data[i] += c; | |
| }); | |
| res.on('end', () => { | |
| // data is in data now | |
| var filename; | |
| for (let j = 0; j < 5; j++) { | |
| if (filename) break; | |
| title_regex.lastIndex = 0; | |
| filename = title_regex.exec(data[i]); | |
| } | |
| if (filename) {} | |
| else { | |
| console.log('for some reason ${this_song_id} failed to parse filename... oops!'); | |
| filename = [0, this_song_id]; | |
| } | |
| console.log(`${this_song_id} is ${filename[1]}.m4a`); | |
| var download_url; | |
| for (let j = 0; j < 5; j++) { | |
| if (download_url) break; | |
| playurl_regex.lastIndex = 0; | |
| download_url = playurl_regex.exec(data[i]); | |
| } | |
| if (filename && download_url) download_file(download_url[1], filename[1] + '.m4a'); | |
| else { | |
| console.log(`for some reason ${this_song_id} failed to parse download url... oops!`); | |
| download_file(null, this_song_id + '.html', data[i]); | |
| } | |
| }); | |
| }).on('error', (e) => { | |
| console.error(this_song + ': ' + e); | |
| }); | |
| }); |