CodeAsm · July 29, 2025 13:53
diff --git a/grab.py b/grab.py
 # movie grabber in python
 # This script pulls videos from websites using cookies for authentication.
 # Usage:
 # 1. Have the required libraries installed:
 #    pip install requests beautifulsoup4
 # 2. Get your cookies in a file "cookies.txt":
 #    cookie_name1|cookie_value1
 #    cookie_name2|cookie_value2
 # 3. Run the script and provide the cookies.txt and website URL when prompted.
 # Note1: This script is designed to work with websites that require cookies for access.
 # Note2: for "reasons" it also checks ../cookies.txt for cookies, so you can place your cookies file there.
 #
 # Example how to extract cookies from Firefox:
 # find ~/.mozilla/firefox -name "cookies.sqlite"
 #
 # sqlite3 ~/.mozilla/firefox/<profile>/cookies.sqlite "SELECT host, name, value FROM moz_cookies WHERE host LIKE '%4chan.org%'"
 #
 # Save the cookies in a file (e.g., cookies.txt) in the format:
 #
 # cookie_name1=cookie_value1
 # cookie_name2=cookie_value2
 #  or just copy the results from sqlite3 into the file, my code will parse it.
 # 
 # References, my bigest thank yous:
 # https://stackoverflow.com/questions/44187490/downloading-files-from-a-website-using-python#44189025
 # https://webbrowsertools.com/useragent/?method=normal&verbose=false




 import os
 import re
 import time
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin

 def load_cookies(cookie_file):
    """Load cookies from a file and return them as a dictionary."""
    cookies = {}
    if os.path.exists(cookie_file):
        with open(cookie_file, "r") as f:
            for line in f:
                parts = line.strip().split("|")
                if len(parts) >= 3:  # Ensure the line has at least 3 parts
                    name = parts[1]  # Second field is the cookie name
                    value = parts[2]  # Third field is the cookie value
                    cookies[name] = value
        print(f"Loaded cookies from {cookie_file}")
    else:
        print(f"Cookie file {cookie_file} not found. Proceeding without cookies.")
    return cookies

 def get_video_links(url, cookies=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": url
    }
    response = requests.get(url, headers=headers, cookies=cookies)
    response.raise_for_status()  # Ensure the request was successful
    
    soup = BeautifulSoup(response.text, "html.parser")
    video_extensions = (".mp4", ".webm", ".gif")
    video_links = set()
    
    for tag in soup.find_all(["a", "source", "video", "img"]):
        src = tag.get("href") or tag.get("src")
        if src and src.endswith(video_extensions):
            video_links.add(urljoin(url, src))
    
    return video_links

 def download_videos(video_links, output_dir="videos"):
    os.makedirs(output_dir, exist_ok=True)
    
    for link in video_links:
        filename = os.path.join(output_dir, os.path.basename(link))
        if os.path.exists(filename):
            print(f"File already exists: {filename}. Skipping download.")
            continue  # Skip downloading if the file already exists
        
        print(f"Downloading: {link} -> {filename}")
        os.system(f"wget -c --show-progress -O '{filename}' '{link}'")
        time.sleep(2)  # Slow down requests to avoid 429

 if __name__ == "__main__":
    site_url = input("Enter the website URL: ").strip()
    cookie_file = "../cookies.txt"  # Default cookie file location
    cookies = load_cookies(cookie_file)
    
    videos = get_video_links(site_url, cookies=cookies)
    if videos:
        print(f"Found {len(videos)} videos. Starting download...")
        download_videos(videos)
    else:
        print("No videos found.")
	# movie grabber in python
	# This script pulls videos from websites using cookies for authentication.
	# Usage:
	# 1. Have the required libraries installed:
	# pip install requests beautifulsoup4
	# 2. Get your cookies in a file "cookies.txt":
	# cookie_name1\|cookie_value1
	# cookie_name2\|cookie_value2
	# 3. Run the script and provide the cookies.txt and website URL when prompted.
	# Note1: This script is designed to work with websites that require cookies for access.
	# Note2: for "reasons" it also checks ../cookies.txt for cookies, so you can place your cookies file there.
	#
	# Example how to extract cookies from Firefox:
	# find ~/.mozilla/firefox -name "cookies.sqlite"
	#
	# sqlite3 ~/.mozilla/firefox/<profile>/cookies.sqlite "SELECT host, name, value FROM moz_cookies WHERE host LIKE '%4chan.org%'"
	#
	# Save the cookies in a file (e.g., cookies.txt) in the format:
	#
	# cookie_name1=cookie_value1
	# cookie_name2=cookie_value2
	# or just copy the results from sqlite3 into the file, my code will parse it.
	#
	# References, my bigest thank yous:
	# https://stackoverflow.com/questions/44187490/downloading-files-from-a-website-using-python#44189025
	# https://webbrowsertools.com/useragent/?method=normal&verbose=false




	import os
	import re
	import time
	import requests
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin

	def load_cookies(cookie_file):
	"""Load cookies from a file and return them as a dictionary."""
	cookies = {}
	if os.path.exists(cookie_file):
	with open(cookie_file, "r") as f:
	for line in f:
	parts = line.strip().split("\|")
	if len(parts) >= 3: # Ensure the line has at least 3 parts
	name = parts[1] # Second field is the cookie name
	value = parts[2] # Third field is the cookie value
	cookies[name] = value
	print(f"Loaded cookies from {cookie_file}")
	else:
	print(f"Cookie file {cookie_file} not found. Proceeding without cookies.")
	return cookies

	def get_video_links(url, cookies=None):
	headers = {
	"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
	"Accept-Language": "en-US,en;q=0.9",
	"Referer": url
	}
	response = requests.get(url, headers=headers, cookies=cookies)
	response.raise_for_status() # Ensure the request was successful

	soup = BeautifulSoup(response.text, "html.parser")
	video_extensions = (".mp4", ".webm", ".gif")
	video_links = set()

	for tag in soup.find_all(["a", "source", "video", "img"]):
	src = tag.get("href") or tag.get("src")
	if src and src.endswith(video_extensions):
	video_links.add(urljoin(url, src))

	return video_links

	def download_videos(video_links, output_dir="videos"):
	os.makedirs(output_dir, exist_ok=True)

	for link in video_links:
	filename = os.path.join(output_dir, os.path.basename(link))
	if os.path.exists(filename):
	print(f"File already exists: {filename}. Skipping download.")
	continue # Skip downloading if the file already exists

	print(f"Downloading: {link} -> {filename}")
	os.system(f"wget -c --show-progress -O '{filename}' '{link}'")
	time.sleep(2) # Slow down requests to avoid 429

	if __name__ == "__main__":
	site_url = input("Enter the website URL: ").strip()
	cookie_file = "../cookies.txt" # Default cookie file location
	cookies = load_cookies(cookie_file)

	videos = get_video_links(site_url, cookies=cookies)
	if videos:
	print(f"Found {len(videos)} videos. Starting download...")
	download_videos(videos)
	else:
	print("No videos found.")
No results found