Created
August 12, 2019 16:43
-
-
Save greensounds/c9879b88442871ee7337dab9a6154ee0 to your computer and use it in GitHub Desktop.
Code for the airbnb scrapping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const puppeteer = require('puppeteer'); | |
| const cheerio = require('cheerio'); | |
| const sample = { | |
| guests: 1, | |
| bedrooms: 1, | |
| beds: 1, | |
| baths: 1, | |
| pesosPerNight: 350 | |
| } | |
| let browser; | |
| async function scrapeHomesIndexPage(url) { | |
| try { | |
| const page = await browser.newPage(); | |
| await page.goto(url); | |
| const html = await page.evaluate(() => document.body.innerHTML); | |
| const $ = await cheerio.load(html); | |
| const homes = $("[itemprop='url']") | |
| .map((i, element) => | |
| "https://" + $(element).attr("content")) | |
| .get(); | |
| //console.log(homes); | |
| return homes; | |
| } catch(err) { | |
| console.error(err) | |
| } | |
| } | |
| async function scrapeDescriptionPage(url, page) { | |
| try { | |
| await page.goto(url, { waitUntil: 'networkidle2'}); | |
| const html = await page.evaluate(() => document.body.innerHTML); | |
| const $ = await cheerio.load(html); | |
| const pricePerNight = $("#room > div > div > div > div > div > div > div:nth-child(1) > div > div > div > div > div > div:nth-child(2) > div > div"); | |
| console.log(pricePerNight); | |
| } catch(err) { | |
| console.error(err); | |
| } | |
| } | |
| async function main() { | |
| browser = await puppeteer.launch({ headless: false }); | |
| const descriptionPage = await browser.newPage(); | |
| const homes = await scrapeHomesIndexPage('https://www.airbnb.com/s/Wellington--New-Zealand/homes?refinement_paths%5B%5D=%2Fhomes&query=Wellington%2C%20New%20Zealand&place_id=ChIJy3TpSfyxOG0RcLQTomPvAAo&search_type=pagination&s_tag=MrLkVXK5§ion_offset=6&items_offset=18') | |
| for(var i = 0; i < homes.length; i++) { | |
| await scrapeDescriptionPage(homes[i], descriptionPage) | |
| } | |
| console.log(homes) | |
| } | |
| main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment