Created
July 29, 2025 13:53
-
-
Save CodeAsm/24fa8b113e79e7c82727efaa98f65925 to your computer and use it in GitHub Desktop.
a python script to pull video files like gif, webm and mp4 from a website. if it already has it, wont redownload. can use cookies to authenticate.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # movie grabber in python | |
| # This script pulls videos from websites using cookies for authentication. | |
| # Usage: | |
| # 1. Have the required libraries installed: | |
| # pip install requests beautifulsoup4 | |
| # 2. Get your cookies in a file "cookies.txt": | |
| # cookie_name1|cookie_value1 | |
| # cookie_name2|cookie_value2 | |
| # 3. Run the script and provide the cookies.txt and website URL when prompted. | |
| # Note1: This script is designed to work with websites that require cookies for access. | |
| # Note2: for "reasons" it also checks ../cookies.txt for cookies, so you can place your cookies file there. | |
| # | |
| # Example how to extract cookies from Firefox: | |
| # find ~/.mozilla/firefox -name "cookies.sqlite" | |
| # | |
| # sqlite3 ~/.mozilla/firefox/<profile>/cookies.sqlite "SELECT host, name, value FROM moz_cookies WHERE host LIKE '%4chan.org%'" | |
| # | |
| # Save the cookies in a file (e.g., cookies.txt) in the format: | |
| # | |
| # cookie_name1=cookie_value1 | |
| # cookie_name2=cookie_value2 | |
| # or just copy the results from sqlite3 into the file, my code will parse it. | |
| # | |
| # References, my bigest thank yous: | |
| # https://stackoverflow.com/questions/44187490/downloading-files-from-a-website-using-python#44189025 | |
| # https://webbrowsertools.com/useragent/?method=normal&verbose=false | |
| import os | |
| import re | |
| import time | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from urllib.parse import urljoin | |
| def load_cookies(cookie_file): | |
| """Load cookies from a file and return them as a dictionary.""" | |
| cookies = {} | |
| if os.path.exists(cookie_file): | |
| with open(cookie_file, "r") as f: | |
| for line in f: | |
| parts = line.strip().split("|") | |
| if len(parts) >= 3: # Ensure the line has at least 3 parts | |
| name = parts[1] # Second field is the cookie name | |
| value = parts[2] # Third field is the cookie value | |
| cookies[name] = value | |
| print(f"Loaded cookies from {cookie_file}") | |
| else: | |
| print(f"Cookie file {cookie_file} not found. Proceeding without cookies.") | |
| return cookies | |
| def get_video_links(url, cookies=None): | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| "Referer": url | |
| } | |
| response = requests.get(url, headers=headers, cookies=cookies) | |
| response.raise_for_status() # Ensure the request was successful | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| video_extensions = (".mp4", ".webm", ".gif") | |
| video_links = set() | |
| for tag in soup.find_all(["a", "source", "video", "img"]): | |
| src = tag.get("href") or tag.get("src") | |
| if src and src.endswith(video_extensions): | |
| video_links.add(urljoin(url, src)) | |
| return video_links | |
| def download_videos(video_links, output_dir="videos"): | |
| os.makedirs(output_dir, exist_ok=True) | |
| for link in video_links: | |
| filename = os.path.join(output_dir, os.path.basename(link)) | |
| if os.path.exists(filename): | |
| print(f"File already exists: {filename}. Skipping download.") | |
| continue # Skip downloading if the file already exists | |
| print(f"Downloading: {link} -> {filename}") | |
| os.system(f"wget -c --show-progress -O '{filename}' '{link}'") | |
| time.sleep(2) # Slow down requests to avoid 429 | |
| if __name__ == "__main__": | |
| site_url = input("Enter the website URL: ").strip() | |
| cookie_file = "../cookies.txt" # Default cookie file location | |
| cookies = load_cookies(cookie_file) | |
| videos = get_video_links(site_url, cookies=cookies) | |
| if videos: | |
| print(f"Found {len(videos)} videos. Starting download...") | |
| download_videos(videos) | |
| else: | |
| print("No videos found.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment