PragatiVerma18 · December 4, 2024 15:14
diff --git a/complete_code_overview.py b/complete_code_overview.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from bs4 import BeautifulSoup

 import time
 import csv

 # Set up the WebDriver (make sure you have the appropriate driver installed, e.g., ChromeDriver)
 driver = webdriver.Chrome()

 # Open the page
 driver.get("https://www.scrapingcourse.com/button-click")

 # Loop to click the "Load More" button until there are no more products to load
 while True:
    try:
        # Find the "Load more" button by its ID and click it
        load_more_button = driver.find_element(By.ID, "load-more-btn")
        load_more_button.click()
        # Wait for the content to load (adjust time as necessary)
        time.sleep(2)
    except Exception as e:
        # If no "Load More" button is found (end of products), break out of the loop
        print("No more products to load.")
        break
        
 # Get the updated page content after all products are loaded
 html_content = driver.page_source

 # Close the browser window
 driver.quit()

 # Parse the page content with BeautifulSoup
 soup = BeautifulSoup(html_content, 'html.parser')

 # Extract product details
 products = []

 # Find all product items in the grid
 product_items = soup.find_all('div', class_='product-item')

 for product in product_items:
    # Extract the product name
    name = product.find('span', class_='product-name').get_text(strip=True)
    
    # Extract the product price
    price = product.find('span', class_='product-price').get_text(strip=True)
    
    # Extract the product link
    link = product.find('a')['href']
    
    # Extract the image URL
    image_url = product.find('img')['src']
    
    # Create a dictionary with the product details
    products.append({
        'name': name,
        'price': price,
        'link': link,
        'image_url': image_url
    })
    
 # Print the extracted product details
 for product in products[:2]: # You can modify the slice as needed to check more products
    print(f"Name: {product['name']}")
    print(f"Price: {product['price']}")
    print(f"Link: {product['link']}")
    print(f"Image URL: {product['image_url']}")
    print('-' * 30)
    
 # Write the product information to a CSV file
 with open("products.csv", mode="w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=["name", "image_url", "price", "link"])
    writer.writeheader()
    for product in products:
        writer.writerow(product)
diff --git a/complete_code_with_top_5_products.py b/complete_code_with_top_5_products.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from bs4 import BeautifulSoup

 import time
 import csv

 # Set up the WebDriver (make sure you have the appropriate driver installed, e.g., ChromeDriver)
 driver = webdriver.Chrome()

 # Open the page
 driver.get("https://www.scrapingcourse.com/button-click")

 # Loop to click the "Load More" button until there are no more products to load
 while True:
    try:
        # Find the "Load more" button by its ID and click it
        load_more_button = driver.find_element(By.ID, "load-more-btn")
        load_more_button.click()
        # Wait for the content to load (adjust time as necessary)
        time.sleep(2)
    except Exception as e:
        # If no "Load More" button is found (end of products), break out of the loop
        print("No more products to load.")
        break
        
 # Get the updated page content after all products are loaded
 html_content = driver.page_source

 # Parse the page content with BeautifulSoup
 soup = BeautifulSoup(html_content, 'html.parser')

 # Extract product details
 products = []

 # Find all product items in the grid
 product_items = soup.find_all('div', class_='product-item')

 for product in product_items:
    # Extract the product name
    name = product.find('span', class_='product-name').get_text(strip=True)
    
    # Extract the product price
    price = product.find('span', class_='product-price').get_text(strip=True)
    
    # Convert price to a float for sorting (remove '$' or other symbols as needed)
    price_float = float(price.replace('$', '').replace(',', '').strip())
    
    # Extract the product link
    link = product.find('a')['href']
    
    # Extract the image URL
    image_url = product.find('img')['src']
    
    # Create a dictionary with the product details
    products.append({
        'name': name,
        'price': price,
        'price_float': price_float, # store as float for sorting
        'link': link,
        'image_url': image_url
 })
    
 # Sort products by price (descending order)
 sorted_products = sorted(products, key=lambda x: x['price_float'], reverse=True)

 # Get the top 5 highest-priced products
 top_5_products = sorted_products[:5]

 # Visit each of the top 5 product pages and extract extra data
 for product in top_5_products:
    # Open the product page
    driver.get(product['link'])
    
    time.sleep(3) # Wait for the page to load
    
    # Parse the page content of the product page
    product_page_soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Extract product description and SKU
    description = product_page_soup.find('div', class_='product-description')
    description_text = description.get_text(strip=True) if description else 'No description available'
    sku = product_page_soup.find('span', class_='sku')
    sku_code = sku.get_text(strip=True) if sku else 'No SKU available'
    
    # Add the extra data to the product details
    product['description'] = description_text
    product['sku'] = sku_code
    
 # Close the browser window after scraping product pages
 driver.quit()

 # Print the extracted product details for the top 5 products
 for product in top_5_products:
    print(f"Name: {product['name']}")
    print(f"Price: {product['price']}")
    print(f"Link: {product['link']}")
    print(f"Image URL: {product['image_url']}")
    print(f"Description: {product['description']}")
    print(f"SKU: {product['sku']}")
    print('-' * 30)
    
 # Write the product information to a CSV file, including the extra data
 with open("products.csv", mode="w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=["name", "image_url", "price", "link", "description", "sku"])
    writer.writeheader()
    for product in products:
        writer.writerow({
            'name': product['name'],
            'image_url': product['image_url'],
            'price': product['price'],
            'link': product['link'],
            'description': product.get('description', ''),
            'sku': product.get('sku', '')
 })
diff --git a/export_product_info_to_csv.py b/export_product_info_to_csv.py
 import csv

 # Write the product information to a CSV file
 with open("products.csv", mode="w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=["name", "image_url", "price", "link"])
    writer.writeheader()
    for product in products:
        writer.writerow(product)
diff --git a/get_initial_html_content.py b/get_initial_html_content.py
 import requests

 # URL of the demo page with products
 url = "https://www.scrapingcourse.com/button-click"

 # Send a GET request to the URL
 response = requests.get(url)

 # Check if the request was successful
 if response.status_code == 200:
    html_content = response.text
    print(html_content) # Optional: Preview the HTML
 else:
    print(f"Failed to retrieve content: {response.status_code}")
diff --git a/get_top_5_products.py b/get_top_5_products.py
 # Sort products by price in descending order
 sorted_products = sorted(products, key=lambda x: float(x['price'].replace('$', '')), reverse=True)

 # Scrape extra details for the top 5 products
 driver = webdriver.Chrome()

 for product in sorted_products[:5]:
    driver.get(product['link'])
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    description = soup.find('div', class_='product-description')
    product['description'] = description.get_text(strip=True) if description else "No description"
    sku = soup.find('span', class_='sku')
    product['sku'] = sku.get_text(strip=True) if sku else "No SKU"
    
 driver.quit()

diff --git a/load_more_products_with_selenium.py b/load_more_products_with_selenium.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By

 import time

 # Set up the WebDriver (make sure you have the appropriate driver installed, e.g., ChromeDriver)
 driver = webdriver.Chrome()

 # Open the page
 driver.get("https://www.scrapingcourse.com/button-click")

 # Loop to click the "Load More" button until there are no more products
 while True:
    try:
        # Find the "Load more" button by its ID and click it
        load_more_button = driver.find_element(By.ID, "load-more-btn")
        load_more_button.click()
        # Wait for the content to load (adjust time as necessary)
        time.sleep(2)
    except Exception as e:
        # If no "Load More" button is found (end of products), break out of the loop
        print("No more products to load.")
        break
        
 # Get the updated page content after all products are loaded
 html_content = driver.page_source

 # Close the browser window
 driver.quit()
diff --git a/parse_product_information_with_bs4.py b/parse_product_information_with_bs4.py
 from bs4 import BeautifulSoup

 # Parse the page content with BeautifulSoup
 soup = BeautifulSoup(html_content, 'html.parser')

 # Extract product details
 products = []

 # Find all product items in the grid
 product_items = soup.find_all('div', class_='product-item')

 for product in product_items:
    # Extract the product name
    name = product.find('span', class_='product-name').get_text(strip=True)
    
    # Extract the product price
    price = product.find('span', class_='product-price').get_text(strip=True)
    
    # Extract the product link
    link = product.find('a')['href']
    
    # Extract the image URL
    image_url = product.find('img')['src']
    
    # Create a dictionary with the product details
    products.append({
        'name': name,
        'price': price,
        'link': link,
        'image_url': image_url
 })
    
 # Print the extracted product details
 for product in products[:2]:
    print(f"Name: {product['name']}")
    print(f"Price: {product['price']}")
    print(f"Link: {product['link']}")
    print(f"Image URL: {product['image_url']}")
    print('-' * 30)
diff --git a/verify_selenium_installation.py b/verify_selenium_installation.py
 from selenium import webdriver

 driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
 driver.get("https://www.scrapingcourse.com/button-click")
 print(driver.title)
 driver.quit()
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from bs4 import BeautifulSoup

	import time
	import csv

	# Set up the WebDriver (make sure you have the appropriate driver installed, e.g., ChromeDriver)
	driver = webdriver.Chrome()

	# Open the page
	driver.get("https://www.scrapingcourse.com/button-click")

	# Loop to click the "Load More" button until there are no more products to load
	while True:
	try:
	# Find the "Load more" button by its ID and click it
	load_more_button = driver.find_element(By.ID, "load-more-btn")
	load_more_button.click()
	# Wait for the content to load (adjust time as necessary)
	time.sleep(2)
	except Exception as e:
	# If no "Load More" button is found (end of products), break out of the loop
	print("No more products to load.")
	break

	# Get the updated page content after all products are loaded
	html_content = driver.page_source

	# Close the browser window
	driver.quit()

	# Parse the page content with BeautifulSoup
	soup = BeautifulSoup(html_content, 'html.parser')

	# Extract product details
	products = []

	# Find all product items in the grid
	product_items = soup.find_all('div', class_='product-item')

	for product in product_items:
	# Extract the product name
	name = product.find('span', class_='product-name').get_text(strip=True)

	# Extract the product price
	price = product.find('span', class_='product-price').get_text(strip=True)

	# Extract the product link
	link = product.find('a')['href']

	# Extract the image URL
	image_url = product.find('img')['src']

	# Create a dictionary with the product details
	products.append({
	'name': name,
	'price': price,
	'link': link,
	'image_url': image_url
	})

	# Print the extracted product details
	for product in products[:2]: # You can modify the slice as needed to check more products
	print(f"Name: {product['name']}")
	print(f"Price: {product['price']}")
	print(f"Link: {product['link']}")
	print(f"Image URL: {product['image_url']}")
	print('-' * 30)

	# Write the product information to a CSV file
	with open("products.csv", mode="w", newline="") as file:
	writer = csv.DictWriter(file, fieldnames=["name", "image_url", "price", "link"])
	writer.writeheader()
	for product in products:
	writer.writerow(product)
	import requests

	# URL of the demo page with products
	url = "https://www.scrapingcourse.com/button-click"

	# Send a GET request to the URL
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	html_content = response.text
	print(html_content) # Optional: Preview the HTML
	else:
	print(f"Failed to retrieve content: {response.status_code}")
	# Sort products by price in descending order
	sorted_products = sorted(products, key=lambda x: float(x['price'].replace('$', '')), reverse=True)

	# Scrape extra details for the top 5 products
	driver = webdriver.Chrome()

	for product in sorted_products[:5]:
	driver.get(product['link'])
	time.sleep(3)
	soup = BeautifulSoup(driver.page_source, 'html.parser')
	description = soup.find('div', class_='product-description')
	product['description'] = description.get_text(strip=True) if description else "No description"
	sku = soup.find('span', class_='sku')
	product['sku'] = sku.get_text(strip=True) if sku else "No SKU"

	driver.quit()
	from selenium import webdriver

	driver = webdriver.Chrome() # Ensure ChromeDriver is installed and in PATH
	driver.get("https://www.scrapingcourse.com/button-click")
	print(driver.title)
	driver.quit()