Skip to content

Instantly share code, notes, and snippets.

@ceberous
Created June 6, 2018 04:48
Show Gist options
  • Select an option

  • Save ceberous/26e19a7acfac48b60b4b31c1be7c588c to your computer and use it in GitHub Desktop.

Select an option

Save ceberous/26e19a7acfac48b60b4b31c1be7c588c to your computer and use it in GitHub Desktop.
springfieldspringfield.co.uk/episode_scripts.php Downloader
const path = require( "path" );
const process = require( "process" );
const fs = require( "fs" );
const cp = require( "child_process" );
function get_node_global_path() {
try {
const output = cp.spawnSync( "npm" , [ "root" , "-g" ] , { encoding : "utf8" } );
return output.stdout.trim();
}
catch( error ) { console.log( error ); process.exit( 1 ); }
}
const NodeGlobalPath = get_node_global_path();
const currentPath = process.cwd();
const request = require( path.join( NodeGlobalPath , "request" ) );
const cheerio = require( path.join( NodeGlobalPath , "cheerio" ) );
const pALL = require( path.join( NodeGlobalPath , "p-all" ) );
const seasons_base_url = "https://www.springfieldspringfield.co.uk/episode_scripts.php?tv-show=";
function get_seasons( wShowName ) {
return new Promise( function( resolve , reject ) {
try {
var wResults = [];
request( seasons_base_url + wShowName , function ( err , response , body ) {
if ( err ) { console.log( err ); reject( err ); return; }
try { var $ = cheerio.load( body ); }
catch(err) { reject( "cheerio load failed" ); return; }
$( ".season-episodes" ).each( function () {
var season_id = $( this ).children( "h3" )[ 0 ];
season_id = $( season_id ).text();
var this_season_obj = { id: season_id , episodes: [] };
var episodes = $( this ).children( "a" );
for( var i = 0; i < episodes.length; ++i ) {
var url = $( episodes[ i ] ).attr( "href" );
var title = $( episodes[ i ] ).text();
title = title.trim();
var episode = { url: url , title: title , scriptHTML: null };
this_season_obj[ "episodes" ].push( episode );
}
wResults.push( this_season_obj );
});
resolve( wResults );
});
}
catch( error ) { console.log( error ); reject( error ); }
});
}
const episode_script_base = "https://www.springfieldspringfield.co.uk/";
function get_single_script( wEpisodeURL ) {
return new Promise( function( resolve , reject ) {
try {
request( episode_script_base + wEpisodeURL , function ( err , response , body ) {
if ( err ) { console.log( err ); reject( err ); return; }
try { var $ = cheerio.load( body ); }
catch(err) { reject( "cheerio load failed" ); return; }
var script = $( ".episode_script" ).children()[ 0 ];
script = $( script ).html();
script = script.trim();
resolve( script );
});
}
catch( error ) { console.log( error ); reject( error ); }
});
}
function promise_all_episode_scripts( wEpisodes ) {
return new Promise( function( resolve , reject ) {
try {
const totalS = wEpisodes.length.toString();
const wActions = wEpisodes.map( ( x , i ) => async () => {
console.log( "Downloading [ " + ( i + 1 ).toString() + " ] of " + totalS );
const x1 = await get_single_script( x );
return x1;
});
pALL( wActions , { concurrency: 5 } ).then( result => {
resolve( result );
});
}
catch( error ) { console.log( error ); reject( error ); }
});
}
const tv_show_name = "trailer-park-boys";
( async ()=> {
var seasons = await get_seasons( tv_show_name );
//console.log( seasons );
// Data Dance , NotLikeThis
var sandbox = [];
for ( var i = 0; i < seasons.length; ++i ) {
for ( var j = 0; j < seasons[ i ][ "episodes" ].length; ++j ) {
sandbox.push( seasons[ i ][ "episodes" ][ j ][ "url" ] );
}
}
// Fill Sandbox
var computed_sand = await promise_all_episode_scripts( sandbox );
// Sand Restoration
for ( var i = 0; i < seasons.length; ++i ) {
for ( var j = 0; j < seasons[ i ][ "episodes" ].length; ++j ) {
const script = computed_sand.shift();
seasons[ i ][ "episodes" ][ j ][ "url" ] = episode_script_base + seasons[ i ][ "episodes" ][ j ][ "url" ];
seasons[ i ][ "episodes" ][ j ][ "scriptHTML" ] = script;
}
}
fs.writeFileSync( path.join( currentPath , tv_show_name + ".json" ) , JSON.stringify( seasons , null , 4 ) );
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment