Skip to content

Instantly share code, notes, and snippets.

@CodeAsm
Created July 29, 2025 13:53
Show Gist options
  • Select an option

  • Save CodeAsm/24fa8b113e79e7c82727efaa98f65925 to your computer and use it in GitHub Desktop.

Select an option

Save CodeAsm/24fa8b113e79e7c82727efaa98f65925 to your computer and use it in GitHub Desktop.
a python script to pull video files like gif, webm and mp4 from a website. if it already has it, wont redownload. can use cookies to authenticate.
# movie grabber in python
# This script pulls videos from websites using cookies for authentication.
# Usage:
# 1. Have the required libraries installed:
# pip install requests beautifulsoup4
# 2. Get your cookies in a file "cookies.txt":
# cookie_name1|cookie_value1
# cookie_name2|cookie_value2
# 3. Run the script and provide the cookies.txt and website URL when prompted.
# Note1: This script is designed to work with websites that require cookies for access.
# Note2: for "reasons" it also checks ../cookies.txt for cookies, so you can place your cookies file there.
#
# Example how to extract cookies from Firefox:
# find ~/.mozilla/firefox -name "cookies.sqlite"
#
# sqlite3 ~/.mozilla/firefox/<profile>/cookies.sqlite "SELECT host, name, value FROM moz_cookies WHERE host LIKE '%4chan.org%'"
#
# Save the cookies in a file (e.g., cookies.txt) in the format:
#
# cookie_name1=cookie_value1
# cookie_name2=cookie_value2
# or just copy the results from sqlite3 into the file, my code will parse it.
#
# References, my bigest thank yous:
# https://stackoverflow.com/questions/44187490/downloading-files-from-a-website-using-python#44189025
# https://webbrowsertools.com/useragent/?method=normal&verbose=false
import os
import re
import time
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
def load_cookies(cookie_file):
"""Load cookies from a file and return them as a dictionary."""
cookies = {}
if os.path.exists(cookie_file):
with open(cookie_file, "r") as f:
for line in f:
parts = line.strip().split("|")
if len(parts) >= 3: # Ensure the line has at least 3 parts
name = parts[1] # Second field is the cookie name
value = parts[2] # Third field is the cookie value
cookies[name] = value
print(f"Loaded cookies from {cookie_file}")
else:
print(f"Cookie file {cookie_file} not found. Proceeding without cookies.")
return cookies
def get_video_links(url, cookies=None):
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
"Accept-Language": "en-US,en;q=0.9",
"Referer": url
}
response = requests.get(url, headers=headers, cookies=cookies)
response.raise_for_status() # Ensure the request was successful
soup = BeautifulSoup(response.text, "html.parser")
video_extensions = (".mp4", ".webm", ".gif")
video_links = set()
for tag in soup.find_all(["a", "source", "video", "img"]):
src = tag.get("href") or tag.get("src")
if src and src.endswith(video_extensions):
video_links.add(urljoin(url, src))
return video_links
def download_videos(video_links, output_dir="videos"):
os.makedirs(output_dir, exist_ok=True)
for link in video_links:
filename = os.path.join(output_dir, os.path.basename(link))
if os.path.exists(filename):
print(f"File already exists: {filename}. Skipping download.")
continue # Skip downloading if the file already exists
print(f"Downloading: {link} -> {filename}")
os.system(f"wget -c --show-progress -O '{filename}' '{link}'")
time.sleep(2) # Slow down requests to avoid 429
if __name__ == "__main__":
site_url = input("Enter the website URL: ").strip()
cookie_file = "../cookies.txt" # Default cookie file location
cookies = load_cookies(cookie_file)
videos = get_video_links(site_url, cookies=cookies)
if videos:
print(f"Found {len(videos)} videos. Starting download...")
download_videos(videos)
else:
print("No videos found.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment