Skip to content

Instantly share code, notes, and snippets.

@sin3point14
Created October 10, 2024 18:58
Show Gist options
  • Select an option

  • Save sin3point14/fcd1603f098ece397c7ecca959215339 to your computer and use it in GitHub Desktop.

Select an option

Save sin3point14/fcd1603f098ece397c7ecca959215339 to your computer and use it in GitHub Desktop.
Download a reddit account content. Saves comments and posts but doesn't download all assets to render the page properly
import sys
import os
import requests
from bs4 import BeautifulSoup
def dump_stuff(base, output_dir):
os.makedirs(output_dir, exist_ok=True)
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
url = base
i = 0
while True:
print("Dumping", base, "to", output_dir)
response = requests.get(url, headers={"User-Agent": user_agent})
with open(f"{output_dir}/{i}.html", "w", encoding="utf-8") as f:
f.write(response.text)
soup = BeautifulSoup(response.text, "html.parser")
# next url should be in an anchor tag like
# <a href="somtehing" rel="nofollow next">next ›</a>
next_a = soup.find("a", rel="nofollow next")
if not next_a:
break
url = next_a["href"]
i += 1
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python download_reddit_account.py <username>")
sys.exit(1)
user = sys.argv[1]
base = f"https://old.reddit.com/user/{user}"
output_dir = f"output/{user}"
os.makedirs(output_dir, exist_ok=True)
print("Dumping", base, "to", output_dir)
comments_dir = f"{output_dir}/comments"
comments_url = f"{base}/comments"
dump_stuff(comments_url, comments_dir)
submited_dir = f"{output_dir}/submited"
submited_url = f"{base}/submitted"
dump_stuff(submited_url, submited_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment