Created
June 6, 2018 04:48
-
-
Save ceberous/26e19a7acfac48b60b4b31c1be7c588c to your computer and use it in GitHub Desktop.
springfieldspringfield.co.uk/episode_scripts.php Downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const path = require( "path" ); | |
| const process = require( "process" ); | |
| const fs = require( "fs" ); | |
| const cp = require( "child_process" ); | |
| function get_node_global_path() { | |
| try { | |
| const output = cp.spawnSync( "npm" , [ "root" , "-g" ] , { encoding : "utf8" } ); | |
| return output.stdout.trim(); | |
| } | |
| catch( error ) { console.log( error ); process.exit( 1 ); } | |
| } | |
| const NodeGlobalPath = get_node_global_path(); | |
| const currentPath = process.cwd(); | |
| const request = require( path.join( NodeGlobalPath , "request" ) ); | |
| const cheerio = require( path.join( NodeGlobalPath , "cheerio" ) ); | |
| const pALL = require( path.join( NodeGlobalPath , "p-all" ) ); | |
| const seasons_base_url = "https://www.springfieldspringfield.co.uk/episode_scripts.php?tv-show="; | |
| function get_seasons( wShowName ) { | |
| return new Promise( function( resolve , reject ) { | |
| try { | |
| var wResults = []; | |
| request( seasons_base_url + wShowName , function ( err , response , body ) { | |
| if ( err ) { console.log( err ); reject( err ); return; } | |
| try { var $ = cheerio.load( body ); } | |
| catch(err) { reject( "cheerio load failed" ); return; } | |
| $( ".season-episodes" ).each( function () { | |
| var season_id = $( this ).children( "h3" )[ 0 ]; | |
| season_id = $( season_id ).text(); | |
| var this_season_obj = { id: season_id , episodes: [] }; | |
| var episodes = $( this ).children( "a" ); | |
| for( var i = 0; i < episodes.length; ++i ) { | |
| var url = $( episodes[ i ] ).attr( "href" ); | |
| var title = $( episodes[ i ] ).text(); | |
| title = title.trim(); | |
| var episode = { url: url , title: title , scriptHTML: null }; | |
| this_season_obj[ "episodes" ].push( episode ); | |
| } | |
| wResults.push( this_season_obj ); | |
| }); | |
| resolve( wResults ); | |
| }); | |
| } | |
| catch( error ) { console.log( error ); reject( error ); } | |
| }); | |
| } | |
| const episode_script_base = "https://www.springfieldspringfield.co.uk/"; | |
| function get_single_script( wEpisodeURL ) { | |
| return new Promise( function( resolve , reject ) { | |
| try { | |
| request( episode_script_base + wEpisodeURL , function ( err , response , body ) { | |
| if ( err ) { console.log( err ); reject( err ); return; } | |
| try { var $ = cheerio.load( body ); } | |
| catch(err) { reject( "cheerio load failed" ); return; } | |
| var script = $( ".episode_script" ).children()[ 0 ]; | |
| script = $( script ).html(); | |
| script = script.trim(); | |
| resolve( script ); | |
| }); | |
| } | |
| catch( error ) { console.log( error ); reject( error ); } | |
| }); | |
| } | |
| function promise_all_episode_scripts( wEpisodes ) { | |
| return new Promise( function( resolve , reject ) { | |
| try { | |
| const totalS = wEpisodes.length.toString(); | |
| const wActions = wEpisodes.map( ( x , i ) => async () => { | |
| console.log( "Downloading [ " + ( i + 1 ).toString() + " ] of " + totalS ); | |
| const x1 = await get_single_script( x ); | |
| return x1; | |
| }); | |
| pALL( wActions , { concurrency: 5 } ).then( result => { | |
| resolve( result ); | |
| }); | |
| } | |
| catch( error ) { console.log( error ); reject( error ); } | |
| }); | |
| } | |
| const tv_show_name = "trailer-park-boys"; | |
| ( async ()=> { | |
| var seasons = await get_seasons( tv_show_name ); | |
| //console.log( seasons ); | |
| // Data Dance , NotLikeThis | |
| var sandbox = []; | |
| for ( var i = 0; i < seasons.length; ++i ) { | |
| for ( var j = 0; j < seasons[ i ][ "episodes" ].length; ++j ) { | |
| sandbox.push( seasons[ i ][ "episodes" ][ j ][ "url" ] ); | |
| } | |
| } | |
| // Fill Sandbox | |
| var computed_sand = await promise_all_episode_scripts( sandbox ); | |
| // Sand Restoration | |
| for ( var i = 0; i < seasons.length; ++i ) { | |
| for ( var j = 0; j < seasons[ i ][ "episodes" ].length; ++j ) { | |
| const script = computed_sand.shift(); | |
| seasons[ i ][ "episodes" ][ j ][ "url" ] = episode_script_base + seasons[ i ][ "episodes" ][ j ][ "url" ]; | |
| seasons[ i ][ "episodes" ][ j ][ "scriptHTML" ] = script; | |
| } | |
| } | |
| fs.writeFileSync( path.join( currentPath , tv_show_name + ".json" ) , JSON.stringify( seasons , null , 4 ) ); | |
| })(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment