Skip to content

Instantly share code, notes, and snippets.

@sdhutchins
Created November 5, 2025 23:05
Show Gist options
  • Select an option

  • Save sdhutchins/392302ff7e4d4a40331cd9a74d714072 to your computer and use it in GitHub Desktop.

Select an option

Save sdhutchins/392302ff7e4d4a40331cd9a74d714072 to your computer and use it in GitHub Desktop.
Convert Mkdocs Yaml Nav Section to Urls List
import yaml
import requests
import os
import re
YAML_URL = "https://raw.githubusercontent.com/uabrc/uabrc.github.io/refs/heads/main/mkdocs.yml"
BASE_URL = "https://docs.rc.uab.edu/"
PREFIX = "uab_docs_urls"
def extract_paths(nav):
"""Recursively walk through nav entries and return list of .md paths."""
paths = []
for item in nav:
if isinstance(item, dict):
for _, value in item.items():
if isinstance(value, list):
paths.extend(extract_paths(value))
elif isinstance(value, str) and value.endswith(".md"):
paths.append(value)
elif isinstance(item, str) and item.endswith(".md"):
paths.append(item)
return paths
def normalize_url(md_path):
"""Convert Markdown path to docs.rc.uab.edu URL."""
clean = md_path.replace("index.md", "")
return f"{BASE_URL}{clean.replace('.md', '')}"
def get_latest_version():
"""Find the latest version number from existing files."""
existing = [f for f in os.listdir(".") if re.match(f"{PREFIX}_v\\d{{3}}\\.txt", f)]
if not existing:
return 0
versions = [int(re.search(r"v(\d{3})", f).group(1)) for f in existing]
return max(versions)
def main():
# Fetch YAML
response = requests.get(YAML_URL)
response.raise_for_status()
data = yaml.safe_load(response.text)
# Extract URLs
md_files = extract_paths(data.get("nav", []))
urls = sorted(set(normalize_url(p) for p in md_files))
# Versioning
version = get_latest_version() + 1
filename = f"{PREFIX}_v{version:03d}.txt"
# Save current snapshot
with open(filename, "w") as f:
f.write("\n".join(urls))
print(f"✅ Saved {len(urls)} URLs to {filename}")
# Compare to previous version if exists
if version > 1:
prev_file = f"{PREFIX}_v{version-1:03d}.txt"
with open(prev_file) as f:
prev_urls = set(line.strip() for line in f)
new_urls = sorted(set(urls) - prev_urls)
removed_urls = sorted(set(prev_urls) - set(urls))
if new_urls or removed_urls:
with open("uab_docs_url_changes.txt", "a") as log:
log.write(f"\n=== Version {version:03d} ===\n")
if new_urls:
log.write(f"Added ({len(new_urls)}):\n" + "\n".join(new_urls) + "\n")
if removed_urls:
log.write(f"Removed ({len(removed_urls)}):\n" + "\n".join(removed_urls) + "\n")
print(f"📝 Logged changes in uab_docs_url_changes.txt")
else:
print("No URL changes since last version.")
if __name__ == "__main__":
main()
@sdhutchins
Copy link
Author

sdhutchins commented Nov 5, 2025

UAB Docs URL Tracker

This script fetches the latest mkdocs.yml from the UAB Research Computing documentation site and extracts all Markdown-linked pages as full URLs (e.g., https://docs.rc.uab.edu/help/faq).

Each run generates a new versioned snapshot file (uab_docs_urls_v001.txt, v002.txt, etc.) and logs added or removed URLs in uab_docs_url_changes.txt.

To use, install dependencies with pip install pyyaml requests and run python extract_docs_urls_versioned.py.

This helps track documentation changes over time and identify new or missing pages in the official docs.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment