Skip to content

Instantly share code, notes, and snippets.

@greensounds
Created August 12, 2019 16:43
Show Gist options
  • Select an option

  • Save greensounds/c9879b88442871ee7337dab9a6154ee0 to your computer and use it in GitHub Desktop.

Select an option

Save greensounds/c9879b88442871ee7337dab9a6154ee0 to your computer and use it in GitHub Desktop.
Code for the airbnb scrapping
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const sample = {
guests: 1,
bedrooms: 1,
beds: 1,
baths: 1,
pesosPerNight: 350
}
let browser;
async function scrapeHomesIndexPage(url) {
try {
const page = await browser.newPage();
await page.goto(url);
const html = await page.evaluate(() => document.body.innerHTML);
const $ = await cheerio.load(html);
const homes = $("[itemprop='url']")
.map((i, element) =>
"https://" + $(element).attr("content"))
.get();
//console.log(homes);
return homes;
} catch(err) {
console.error(err)
}
}
async function scrapeDescriptionPage(url, page) {
try {
await page.goto(url, { waitUntil: 'networkidle2'});
const html = await page.evaluate(() => document.body.innerHTML);
const $ = await cheerio.load(html);
const pricePerNight = $("#room > div > div > div > div > div > div > div:nth-child(1) > div > div > div > div > div > div:nth-child(2) > div > div");
console.log(pricePerNight);
} catch(err) {
console.error(err);
}
}
async function main() {
browser = await puppeteer.launch({ headless: false });
const descriptionPage = await browser.newPage();
const homes = await scrapeHomesIndexPage('https://www.airbnb.com/s/Wellington--New-Zealand/homes?refinement_paths%5B%5D=%2Fhomes&query=Wellington%2C%20New%20Zealand&place_id=ChIJy3TpSfyxOG0RcLQTomPvAAo&search_type=pagination&s_tag=MrLkVXK5&section_offset=6&items_offset=18')
for(var i = 0; i < homes.length; i++) {
await scrapeDescriptionPage(homes[i], descriptionPage)
}
console.log(homes)
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment