Created
November 5, 2025 23:05
-
-
Save sdhutchins/392302ff7e4d4a40331cd9a74d714072 to your computer and use it in GitHub Desktop.
Convert Mkdocs Yaml Nav Section to Urls List
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import yaml | |
| import requests | |
| import os | |
| import re | |
| YAML_URL = "https://raw.githubusercontent.com/uabrc/uabrc.github.io/refs/heads/main/mkdocs.yml" | |
| BASE_URL = "https://docs.rc.uab.edu/" | |
| PREFIX = "uab_docs_urls" | |
| def extract_paths(nav): | |
| """Recursively walk through nav entries and return list of .md paths.""" | |
| paths = [] | |
| for item in nav: | |
| if isinstance(item, dict): | |
| for _, value in item.items(): | |
| if isinstance(value, list): | |
| paths.extend(extract_paths(value)) | |
| elif isinstance(value, str) and value.endswith(".md"): | |
| paths.append(value) | |
| elif isinstance(item, str) and item.endswith(".md"): | |
| paths.append(item) | |
| return paths | |
| def normalize_url(md_path): | |
| """Convert Markdown path to docs.rc.uab.edu URL.""" | |
| clean = md_path.replace("index.md", "") | |
| return f"{BASE_URL}{clean.replace('.md', '')}" | |
| def get_latest_version(): | |
| """Find the latest version number from existing files.""" | |
| existing = [f for f in os.listdir(".") if re.match(f"{PREFIX}_v\\d{{3}}\\.txt", f)] | |
| if not existing: | |
| return 0 | |
| versions = [int(re.search(r"v(\d{3})", f).group(1)) for f in existing] | |
| return max(versions) | |
| def main(): | |
| # Fetch YAML | |
| response = requests.get(YAML_URL) | |
| response.raise_for_status() | |
| data = yaml.safe_load(response.text) | |
| # Extract URLs | |
| md_files = extract_paths(data.get("nav", [])) | |
| urls = sorted(set(normalize_url(p) for p in md_files)) | |
| # Versioning | |
| version = get_latest_version() + 1 | |
| filename = f"{PREFIX}_v{version:03d}.txt" | |
| # Save current snapshot | |
| with open(filename, "w") as f: | |
| f.write("\n".join(urls)) | |
| print(f"✅ Saved {len(urls)} URLs to {filename}") | |
| # Compare to previous version if exists | |
| if version > 1: | |
| prev_file = f"{PREFIX}_v{version-1:03d}.txt" | |
| with open(prev_file) as f: | |
| prev_urls = set(line.strip() for line in f) | |
| new_urls = sorted(set(urls) - prev_urls) | |
| removed_urls = sorted(set(prev_urls) - set(urls)) | |
| if new_urls or removed_urls: | |
| with open("uab_docs_url_changes.txt", "a") as log: | |
| log.write(f"\n=== Version {version:03d} ===\n") | |
| if new_urls: | |
| log.write(f"Added ({len(new_urls)}):\n" + "\n".join(new_urls) + "\n") | |
| if removed_urls: | |
| log.write(f"Removed ({len(removed_urls)}):\n" + "\n".join(removed_urls) + "\n") | |
| print(f"📝 Logged changes in uab_docs_url_changes.txt") | |
| else: | |
| print("No URL changes since last version.") | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
UAB Docs URL Tracker
This script fetches the latest
mkdocs.ymlfrom the UAB Research Computing documentation site and extracts all Markdown-linked pages as full URLs (e.g.,https://docs.rc.uab.edu/help/faq).Each run generates a new versioned snapshot file (
uab_docs_urls_v001.txt,v002.txt, etc.) and logs added or removed URLs inuab_docs_url_changes.txt.To use, install dependencies with
pip install pyyaml requestsand runpython extract_docs_urls_versioned.py.This helps track documentation changes over time and identify new or missing pages in the official docs.