Created
July 19, 2024 11:50
-
-
Save vivekthedev/36489fbaf896eb7c06ebb9350dec298a to your computer and use it in GitHub Desktop.
lxml Scraping Tutorial Code - Dynamic Scrape
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from selenium import webdriver | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.common.keys import Keys | |
| from lxml import html | |
| from time import sleep | |
| import json | |
| URL = "https://www.youtube.com/@freecodecamp/videos" | |
| videos = [] | |
| driver = webdriver.Chrome() | |
| driver.get(URL) | |
| sleep(3) | |
| parent = driver.find_element(By.TAG_NAME, 'html') | |
| for i in range(4): | |
| parent.send_keys(Keys.END) | |
| sleep(3) | |
| html_data = html.fromstring(driver.page_source) | |
| videos_html = html_data.cssselect("a#video-title-link") | |
| for video in videos_html: | |
| title = video.text_content() | |
| link = "https://www.youtube.com" + video.get("href") | |
| videos.append( {"title": title, "link": link} ) | |
| with open('videos.json', 'w') as file: | |
| json.dump(videos, file) | |
| driver.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment