Created
September 28, 2025 09:07
-
-
Save vbresan/38c9c0e682b746d5b294cb732a2810a0 to your computer and use it in GitHub Desktop.
Prints which reddit threads, listed in an OPML file, are archived.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Usage: python list-archived.py [filename] | |
| Prints which reddit threads, listed in an OPML file, are archived. | |
| """ | |
| import requests | |
| import sys | |
| import xml.etree.ElementTree as ET | |
| def get_request_url(url: str) -> str: | |
| url = url.replace("https://old.reddit.com", "https://www.reddit.com") | |
| post_start = url.find("/comments/") + len("/comments/") | |
| post_end = url.find("/", post_start) | |
| url = url[:post_end] + ".json" | |
| return url | |
| def is_thread_archived(url: str) -> bool: | |
| headers = { "User-Agent": "list-archived/0.1" } | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| data = response.json() | |
| data = data[0]["data"]["children"][0]["data"] | |
| return data.get("archived", False) | |
| return False | |
| if __name__ == "__main__": | |
| root = ET.parse(sys.argv[1]).getroot() | |
| for outline in root.iter("outline"): | |
| url = get_request_url(outline.attrib["htmlUrl"]) | |
| if is_thread_archived(url): | |
| print(outline.attrib["text"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment