Skip to content

Instantly share code, notes, and snippets.

@Technoash
Created December 1, 2016 05:49
Show Gist options
  • Select an option

  • Save Technoash/3ccd88944ebf93c7ed51e145d64f68b4 to your computer and use it in GitHub Desktop.

Select an option

Save Technoash/3ccd88944ebf93c7ed51e145d64f68b4 to your computer and use it in GitHub Desktop.
var request = require('request');
var cheerio = require('cheerio');
scrape();
//http://stackoverflow.com/questions/5917082/regular-expression-to-match-numbers-with-or-without-commas-and-decimals-in-text
function scrape(){
//url = 'https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=%22Panasonic+ErgoFit+RP-HJE120%22&rh=i%3Aaps%2Ck%3A%22Panasonic+ErgoFit+RP-HJE120%22';
url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=Panasonic+ErgoFit+RP-HJE120&rh=i%3Aaps%2Ck%3APanasonic+ErgoFit+RP-HJE120';
url = 'https://www.amazon.com/s/?keywords=%22' + encodeURIComponent('beats') + '%22&page=3';
request(url, function(error, response, html){
console.log("got page");
if(!error){
var $ = cheerio.load(html);
$('#s-result-count').filter(function(){
var data = $(this);
var countString = data.contents().filter(function(){
return this.nodeType == 3;
})[0].data;
console.log(countString);
var search1 = countString.match(/^(?:(\d{1,3}(?:,\d{3})*)\-(\d{1,3}(?:,\d{3})*) of (\d{1,3}(?:,\d{3})*) results for )$/);
var search2 = countString.match(/^(?:(\d+) results for )$/);
if(search1 == null){
//single page
var totalResults = search2[1].split(',').join('');
console.log(totalResults);
}
else if(search2 == null){
//multiple pages
var currPageIDStart = search1[1].split(',').join('');
var currPageIDEnd = search1[2].split(',').join('');
var totalPages = search1[3].split(',').join('');
console.log('currPageIDStart', currPageIDStart);
console.log('currPageIDEnd', currPageIDEnd);
console.log('totalPages', totalPages);
}
$(".s-result-item").each(function(){
console.log($(this).find(".s-access-title").attr('data-attribute'));
});
})
}
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment