Created
October 10, 2024 18:58
-
-
Save sin3point14/fcd1603f098ece397c7ecca959215339 to your computer and use it in GitHub Desktop.
Download a reddit account content. Saves comments and posts but doesn't download all assets to render the page properly
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import os | |
| import requests | |
| from bs4 import BeautifulSoup | |
| def dump_stuff(base, output_dir): | |
| os.makedirs(output_dir, exist_ok=True) | |
| user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" | |
| url = base | |
| i = 0 | |
| while True: | |
| print("Dumping", base, "to", output_dir) | |
| response = requests.get(url, headers={"User-Agent": user_agent}) | |
| with open(f"{output_dir}/{i}.html", "w", encoding="utf-8") as f: | |
| f.write(response.text) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| # next url should be in an anchor tag like | |
| # <a href="somtehing" rel="nofollow next">next ›</a> | |
| next_a = soup.find("a", rel="nofollow next") | |
| if not next_a: | |
| break | |
| url = next_a["href"] | |
| i += 1 | |
| if __name__ == "__main__": | |
| if len(sys.argv) != 2: | |
| print("Usage: python download_reddit_account.py <username>") | |
| sys.exit(1) | |
| user = sys.argv[1] | |
| base = f"https://old.reddit.com/user/{user}" | |
| output_dir = f"output/{user}" | |
| os.makedirs(output_dir, exist_ok=True) | |
| print("Dumping", base, "to", output_dir) | |
| comments_dir = f"{output_dir}/comments" | |
| comments_url = f"{base}/comments" | |
| dump_stuff(comments_url, comments_dir) | |
| submited_dir = f"{output_dir}/submited" | |
| submited_url = f"{base}/submitted" | |
| dump_stuff(submited_url, submited_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment