ugovaretto · December 2, 2025 11:01
diff --git a/extract-links.py b/extract-links.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.service import Service
 from webdriver_manager.chrome import ChromeDriverManager

 # 1. Setup the WebDriver (using webdriver_manager for simplicity)
 # This automatically downloads and manages the correct ChromeDriver version
 service = Service(ChromeDriverManager().install())
 driver = webdriver.Chrome(service=service)

 try:
    # 2. Navigate to the target page
    url = 'https://example-site.com' # Replace with your target URL
    driver.get(url)

    # 3. Find all anchor elements (<a> tags) on the page
    # The By.TAG_NAME locator finds all elements with the tag 'a'
    link_elements = driver.find_elements(By.TAG_NAME, 'a')

    # 4. Extract the 'href' attribute from each element and store in a list
    all_urls = []
    for link_element in link_elements:
        href = link_element.get_attribute('href')
        if href and href.strip(): # Check if the href attribute is not empty or None
            all_urls.append(href)

    # 5. Print the extracted URLs
    print(f"Found {len(all_urls)} URLs:")
    for single_url in all_urls:
        print(single_url)

 except Exception as e:
    print(f"An error occurred: {e}")

 finally:
    # 6. Close the browser
    driver.quit()
diff --git a/headless.py b/headless.py
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from markdownify import markdownify as md

 # 1. Set up Chrome options for headless mode
 chrome_options = Options()
 chrome_options.add_argument("--headless=new") # Use the new headless mode for full functionality
 chrome_options.add_argument("--disable-gpu") # Good practice for some systems
 chrome_options.add_argument("--window-size=1920,1080") # Set a default window size for consistent rendering

 # 2. Initialize the WebDriver with the specified options
 # Selenium Manager automatically handles the driver installation/location in modern versions
 driver = webdriver.Chrome(options=chrome_options)
 driver.get("http://www.python.org")
 assert "Python" in driver.title
 elem = driver.find_element(By.NAME, "q")
 elem.clear()
 elem.send_keys("pycon")
 elem.send_keys(Keys.RETURN)
 assert "No results found." not in driver.page_source

 driver.save_screenshot("python.png")
 # Convert HTML to Markdown
 # You can configure markdownify to ignore certain tags if needed (e.g., strip=['script', 'style'])
 html_content = driver.page_source
 markdown_text = md(html_content, heading_style="ATX", strip=['script', 'style', 'noscript'])
 filename = "python.md"
 # Get the full page source HTML
 # Save the Markdown text to a file
 with open(filename, "w", encoding="utf-8") as file:
    file.write(markdown_text)
 driver.close()
diff --git a/save-content.py b/save-content.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from markdownify import markdownify as md
 import time

 def save_page_as_markdown(url, filename="webpage.md"):
    # Setup Selenium WebDriver (using Chrome in this example)
    driver = webdriver.Chrome()

    try:
        # Navigate to the target page
        driver.get(url)

        # Give the page a moment to load dynamic content
        time.sleep(3)

        # Get the full page source HTML
        html_content = driver.page_source

        # Convert HTML to Markdown
        # You can configure markdownify to ignore certain tags if needed (e.g., strip=['script', 'style'])
        markdown_text = md(html_content, heading_style="ATX", strip=['script', 'style', 'noscript'])

        # Save the Markdown text to a file
        with open(filename, "w", encoding="utf-8") as file:
            file.write(markdown_text)

        print(f"Successfully saved page content from {url} to {filename}")

    except Exception as e:
        print(f"An error occurred: {e}")

    finally:
        # Always close the browser
        driver.quit()

 # --- Example Usage ---
 # Replace with the URL you want to save
 target_url = "en.wikipedia.org"
 save_page_as_markdown(target_url, "wiki_page.md")
diff --git a/save-screenshot.py b/save-screenshot.py
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from webdriver_manager.chrome import ChromeDriverManager
 import time

 # Setup the WebDriver (webdriver-manager simplifies driver installation)
 service = Service(ChromeDriverManager().install())
 driver = webdriver.Chrome(service=service)

 try:
    # Navigate to a webpage
    driver.get("https://www.python.org")

    # Wait for the page to load (optional, but a good practice)
    time.sleep(2)

    # Save a screenshot of the visible viewport
    driver.save_screenshot("python_org_screenshot.png")
    # Alternatively: driver.get_screenshot_as_file("python_org_screenshot.png")

    print("Screenshot saved as python_org_screenshot.png")

 except Exception as e:
    print(f"An error occurred: {e}")

 finally:
    # Close the browser
    driver.quit()
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service
	from webdriver_manager.chrome import ChromeDriverManager

	# 1. Setup the WebDriver (using webdriver_manager for simplicity)
	# This automatically downloads and manages the correct ChromeDriver version
	service = Service(ChromeDriverManager().install())
	driver = webdriver.Chrome(service=service)

	try:
	# 2. Navigate to the target page
	url = 'https://example-site.com' # Replace with your target URL
	driver.get(url)

	# 3. Find all anchor elements (<a> tags) on the page
	# The By.TAG_NAME locator finds all elements with the tag 'a'
	link_elements = driver.find_elements(By.TAG_NAME, 'a')

	# 4. Extract the 'href' attribute from each element and store in a list
	all_urls = []
	for link_element in link_elements:
	href = link_element.get_attribute('href')
	if href and href.strip(): # Check if the href attribute is not empty or None
	all_urls.append(href)

	# 5. Print the extracted URLs
	print(f"Found {len(all_urls)} URLs:")
	for single_url in all_urls:
	print(single_url)

	except Exception as e:
	print(f"An error occurred: {e}")

	finally:
	# 6. Close the browser
	driver.quit()
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.common.by import By
	from selenium.webdriver.common.keys import Keys
	from markdownify import markdownify as md

	# 1. Set up Chrome options for headless mode
	chrome_options = Options()
	chrome_options.add_argument("--headless=new") # Use the new headless mode for full functionality
	chrome_options.add_argument("--disable-gpu") # Good practice for some systems
	chrome_options.add_argument("--window-size=1920,1080") # Set a default window size for consistent rendering

	# 2. Initialize the WebDriver with the specified options
	# Selenium Manager automatically handles the driver installation/location in modern versions
	driver = webdriver.Chrome(options=chrome_options)
	driver.get("http://www.python.org")
	assert "Python" in driver.title
	elem = driver.find_element(By.NAME, "q")
	elem.clear()
	elem.send_keys("pycon")
	elem.send_keys(Keys.RETURN)
	assert "No results found." not in driver.page_source

	driver.save_screenshot("python.png")
	# Convert HTML to Markdown
	# You can configure markdownify to ignore certain tags if needed (e.g., strip=['script', 'style'])
	html_content = driver.page_source
	markdown_text = md(html_content, heading_style="ATX", strip=['script', 'style', 'noscript'])
	filename = "python.md"
	# Get the full page source HTML
	# Save the Markdown text to a file
	with open(filename, "w", encoding="utf-8") as file:
	file.write(markdown_text)
	driver.close()