Created
December 1, 2016 05:49
-
-
Save Technoash/3ccd88944ebf93c7ed51e145d64f68b4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var request = require('request'); | |
| var cheerio = require('cheerio'); | |
| scrape(); | |
| //http://stackoverflow.com/questions/5917082/regular-expression-to-match-numbers-with-or-without-commas-and-decimals-in-text | |
| function scrape(){ | |
| //url = 'https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=%22Panasonic+ErgoFit+RP-HJE120%22&rh=i%3Aaps%2Ck%3A%22Panasonic+ErgoFit+RP-HJE120%22'; | |
| url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=Panasonic+ErgoFit+RP-HJE120&rh=i%3Aaps%2Ck%3APanasonic+ErgoFit+RP-HJE120'; | |
| url = 'https://www.amazon.com/s/?keywords=%22' + encodeURIComponent('beats') + '%22&page=3'; | |
| request(url, function(error, response, html){ | |
| console.log("got page"); | |
| if(!error){ | |
| var $ = cheerio.load(html); | |
| $('#s-result-count').filter(function(){ | |
| var data = $(this); | |
| var countString = data.contents().filter(function(){ | |
| return this.nodeType == 3; | |
| })[0].data; | |
| console.log(countString); | |
| var search1 = countString.match(/^(?:(\d{1,3}(?:,\d{3})*)\-(\d{1,3}(?:,\d{3})*) of (\d{1,3}(?:,\d{3})*) results for )$/); | |
| var search2 = countString.match(/^(?:(\d+) results for )$/); | |
| if(search1 == null){ | |
| //single page | |
| var totalResults = search2[1].split(',').join(''); | |
| console.log(totalResults); | |
| } | |
| else if(search2 == null){ | |
| //multiple pages | |
| var currPageIDStart = search1[1].split(',').join(''); | |
| var currPageIDEnd = search1[2].split(',').join(''); | |
| var totalPages = search1[3].split(',').join(''); | |
| console.log('currPageIDStart', currPageIDStart); | |
| console.log('currPageIDEnd', currPageIDEnd); | |
| console.log('totalPages', totalPages); | |
| } | |
| $(".s-result-item").each(function(){ | |
| console.log($(this).find(".s-access-title").attr('data-attribute')); | |
| }); | |
| }) | |
| } | |
| }) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment