Skip to content

Instantly share code, notes, and snippets.

@Wowfunhappy
Last active June 27, 2025 16:40
Show Gist options
  • Select an option

  • Save Wowfunhappy/f0a563688948e1076f171b48486cc61d to your computer and use it in GitHub Desktop.

Select an option

Save Wowfunhappy/f0a563688948e1076f171b48486cc61d to your computer and use it in GitHub Desktop.
Download OS X Mavericks Help Viewer Content
#!/usr/bin/env python3
"""
This script downloads and saves an offline copy of any Mavericks (10.9) Apple Help documentation.
It creates a directory structure identical to the original help system.
"""
import os
import re
import json
import requests
from urllib.parse import urljoin, urlparse, urlunparse
from pathlib import Path
import shutil
import argparse
class MavericksHelpDownloader:
def __init__(self, help_url, output_dir):
self.help_url = help_url.rstrip('/')
self.output_dir = Path(output_dir)
# Parse the URL to get the app name
parsed_url = urlparse(help_url)
path_parts = parsed_url.path.strip('/').split('/')
# Extract app name from URL (e.g., 'textedit' or 'keychainaccess')
# URL format: https://help.apple.com/appname/mac/10.9/help/
if len(path_parts) >= 1:
self.app_name = path_parts[0]
else:
self.app_name = 'unknown'
# Use the provided URL as base URL (already includes /help/)
self.base_url = self.help_url + '/'
self.session = requests.Session()
self.downloaded_files = set()
# List of all possible language directories
self.all_languages = [
'ar.lproj', 'ca.lproj', 'cs.lproj', 'da.lproj', 'de.lproj', 'el.lproj',
'en.lproj', 'es.lproj', 'fi.lproj', 'fr.lproj', 'he.lproj', 'hr.lproj',
'hu.lproj', 'id.lproj', 'it.lproj', 'ja.lproj', 'ko.lproj', 'ms.lproj',
'nl.lproj', 'no.lproj', 'pl.lproj', 'pt_PT.lproj', 'pt.lproj', 'ro.lproj',
'ru.lproj', 'sk.lproj', 'sv.lproj', 'th.lproj', 'tr.lproj', 'uk.lproj',
'vi.lproj', 'zh_CN.lproj', 'zh_TW.lproj'
]
print(f"App name: {self.app_name}")
print(f"Base URL: {self.base_url}")
def create_directory_structure(self):
"""Create the initial directory structure."""
print("Creating directory structure...")
# Create main output directory
os.makedirs(self.output_dir, exist_ok=True)
# Create all language directories
for lang in self.all_languages:
os.makedirs(self.output_dir / lang, exist_ok=True)
# Create resources/flamingo/6/images directory
os.makedirs(self.output_dir / 'resources' / 'flamingo' / '6' / 'images', exist_ok=True)
def download_file(self, url, local_path):
"""Download a file from URL and save it to local_path."""
# Skip if already downloaded
if url in self.downloaded_files:
return
# Add to downloaded set
self.downloaded_files.add(url)
# Create full URL if relative
if not url.startswith('http'):
if url.startswith('/'):
# Absolute path from root
parsed_base = urlparse(self.base_url)
full_url = urlunparse((
parsed_base.scheme,
parsed_base.netloc,
url,
'', '', ''
))
else:
# Relative path
full_url = urljoin(self.base_url, url)
else:
full_url = url
# Convert Path object to string to avoid escaping issues
local_path_str = str(local_path)
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(local_path_str), exist_ok=True)
# Download the file
try:
print(f"Downloading {full_url} -> {local_path_str}")
response = self.session.get(full_url, timeout=30)
response.raise_for_status()
with open(local_path_str, 'wb') as f:
f.write(response.content)
return response.content
except Exception as e:
print(f"Error downloading {full_url}: {e}")
return None
def download_main_html(self):
"""Download the main index.html file."""
print("\nDownloading main HTML file...")
# Download index.html
self.download_file("index.html", self.output_dir / "index.html")
# Update paths in index.html
self.update_resource_paths(self.output_dir / "index.html")
def download_resource_files(self):
"""Download CSS, JS, and image files."""
print("\nDownloading resource files...")
# Download and save flamingo.css
self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.css",
self.output_dir / "resources" / "flamingo" / "6" / "flamingo.css")
# Download and save jquery.js
self.download_file("/Library/Documentation/Resources/Flamingo/6/jquery.js",
self.output_dir / "resources" / "flamingo" / "6" / "jquery.js")
# Use provided flamingo.js or download it
if os.path.exists("flamingo.js"):
# Copy the provided flamingo.js
shutil.copy("flamingo.js",
self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js")
print(f"Copied local flamingo.js to {self.output_dir / 'resources' / 'flamingo' / '6' / 'flamingo.js'}")
else:
# Download flamingo.js
self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.js",
self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js")
# Create required image directories
os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "red", exist_ok=True)
os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine", exist_ok=True)
os.makedirs(self.output_dir / "media", exist_ok=True)
# Download all required images
image_files = [
# Base images
("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
("/Library/Documentation/Resources/Flamingo/6/images/disclosure-open.png",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "disclosure-open.png"),
("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close.png",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close.png"),
("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close-hover.png",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close-hover.png"),
("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
# Red theme images
("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow.png",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow.png"),
("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow_blue.png",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow_blue.png"),
# Tangerine theme images
("/Library/Documentation/Resources/Flamingo/6/images/tangerine/[email protected]",
self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine" / "[email protected]"),
]
# Download each image
for src, dest in image_files:
self.download_file(src, dest)
# Download media files - check if they exist first
media_files = self.find_media_files()
for media_file in media_files:
local_path = self.output_dir / media_file
self.download_file(media_file, local_path)
def find_media_files(self):
"""Find media files referenced in the help content."""
media_files = []
# Look for icon files
# Common patterns for app icons
possible_icons = [
f"{self.app_name[:4]}ic.png", # e.g., txteic.png for textedit
f"{self.app_name[:5]}ic.png", # e.g., keychic.png for keychainaccess
f"xicn{self.app_name[:4]}.png", # e.g., xicntxte.png
f"{self.app_name}.png", # e.g., keychainaccess.png
]
# Try to find icon file
for icon in possible_icons:
media_files.append(icon)
media_files.append(f"media/{icon}")
# Add any other common media patterns
media_files.append("media/xicntxte.png") # Common pattern
return media_files
def download_locale_files(self):
"""Download locale-list.json and other locale-specific files."""
print("\nDownloading locale files...")
# Download locale-list.json
locale_list_content = self.download_file("locale-list.json",
self.output_dir / "locale-list.json")
# Track if we found the icon
icon_found = False
# Download locale files for all languages
for lang in self.all_languages:
print(f"\nDownloading {lang} files...")
# Download locale files
self.download_file(f"{lang}/locale-info.json",
self.output_dir / lang / "locale-info.json")
self.download_file(f"{lang}/navigation.json",
self.output_dir / lang / "navigation.json")
self.download_file(f"{lang}/search.json",
self.output_dir / lang / "search.json")
# Download search.helpindex file
self.download_file(f"{lang}/search.helpindex",
self.output_dir / lang / "search.helpindex")
# For the first language with a valid navigation.json, extract and download the icon
if not icon_found and lang == 'en.lproj':
nav_json_path = self.output_dir / lang / "navigation.json"
if nav_json_path.exists():
with open(nav_json_path, 'r', encoding='utf-8') as f:
nav_data_original = f.read()
# Extract the original icon name
icon_match = re.search(r'"icon":"(?:\.\.\/)?([^"]+\.png)"', nav_data_original)
if icon_match:
original_icon = icon_match.group(1)
# Remove escaped forward slashes from JSON
original_icon = original_icon.replace(r'\/', '/')
icon_filename = os.path.basename(original_icon)
# Handle different icon path patterns
if "/" in original_icon and "../" not in original_icon:
# Icon is in a subdirectory (e.g., Art/xicnimov.png)
icon_path = f"{lang}/{original_icon}"
local_icon_path = os.path.join(str(self.output_dir), lang, original_icon)
if self.download_file(icon_path, local_icon_path):
if os.path.exists(local_icon_path):
shutil.copy(local_icon_path, self.output_dir / "icon.png")
print(f"Found and copied icon: {original_icon}")
icon_found = True
elif "../" in original_icon:
# Icon is in parent directory
if self.download_file(icon_filename, self.output_dir / icon_filename):
shutil.copy(self.output_dir / icon_filename, self.output_dir / "icon.png")
print(f"Found and copied icon: {icon_filename}")
icon_found = True
else:
# Icon is in language directory
if self.download_file(f"{lang}/{original_icon}", self.output_dir / lang / original_icon):
shutil.copy(self.output_dir / lang / original_icon, self.output_dir / "icon.png")
print(f"Found and copied icon: {original_icon}")
icon_found = True
# Fix icon paths in navigation.json for each language
nav_json_path = self.output_dir / lang / "navigation.json"
if nav_json_path.exists():
with open(nav_json_path, 'r', encoding='utf-8') as f:
nav_data = f.read()
# Update icon paths to use the common icon.png
nav_data = re.sub(r'"icon":"[^"]+\.png"', '"icon":"icon.png"', nav_data)
with open(nav_json_path, 'w', encoding='utf-8') as f:
f.write(nav_data)
# Copy icon to each language directory
if icon_found and os.path.exists(self.output_dir / "icon.png"):
shutil.copy(self.output_dir / "icon.png", self.output_dir / lang / "icon.png")
return locale_list_content
def download_html_files(self):
"""Download all HTML content files."""
print("\nDownloading HTML content files...")
# Download HTML files for all languages
for lang in self.all_languages:
print(f"\nDownloading {lang} HTML files...")
# Get the list of HTML files from navigation.json
nav_json_path = self.output_dir / lang / "navigation.json"
if not nav_json_path.exists():
print(f"Skipping {lang}: navigation.json not found")
continue
try:
with open(nav_json_path, 'r', encoding='utf-8') as f:
nav_data = json.load(f)
except:
print(f"Skipping {lang}: Could not parse navigation.json")
continue
# Extract HTML files from navigation.json topics
html_files = []
def extract_hrefs(obj):
"""Recursively extract href values from navigation structure."""
if isinstance(obj, dict):
if 'href' in obj and obj['href'].endswith('.html'):
html_files.append(obj['href'])
for value in obj.values():
extract_hrefs(value)
elif isinstance(obj, list):
for item in obj:
extract_hrefs(item)
extract_hrefs(nav_data)
# Remove duplicates and sort
html_files = sorted(set(html_files))
# Download special files
special_files = [
"searchable-landing.html",
"offline-message.html"
]
for file in special_files:
self.download_file(f"{lang}/{file}",
self.output_dir / lang / file)
# Download each HTML file and collect image references
image_files = set()
for html_file in html_files:
html_path = self.output_dir / lang / html_file
self.download_file(f"{lang}/{html_file}", html_path)
# Extract image references from HTML (only for en.lproj to avoid duplicates)
if lang == 'en.lproj' and html_path.exists():
with open(html_path, 'r', encoding='utf-8') as f:
html_content = f.read()
# Find all image sources in the HTML
img_matches = re.findall(r'src="([^"]+\.(?:png|jpg|jpeg|gif))"', html_content, re.IGNORECASE)
for img in img_matches:
# Skip absolute URLs and icon.png references
if not img.startswith('http') and img != 'icon.png' and not img.startswith('../'):
image_files.add(img)
# Fix paths in the HTML file
self.update_html_paths(html_path)
# Download Art directory images (only once from en.lproj)
if lang == 'en.lproj' and image_files:
print("\nDownloading content images...")
for img_file in sorted(image_files):
img_path = f"{lang}/{img_file}"
local_img_path = os.path.join(str(self.output_dir), lang, img_file)
self.download_file(img_path, local_img_path)
# Copy images to all other language directories
if os.path.exists(local_img_path):
for other_lang in self.all_languages:
if other_lang != lang:
other_img_path = os.path.join(str(self.output_dir), other_lang, img_file)
os.makedirs(os.path.dirname(other_img_path), exist_ok=True)
shutil.copy(local_img_path, other_img_path)
def update_resource_paths(self, file_path):
"""Update resource paths in index.html to use local resources."""
if not os.path.exists(file_path):
return
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Replace resource paths
content = content.replace('/Library/Documentation/Resources/Flamingo/6/',
'resources/flamingo/6/')
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Updated resource paths in {file_path}")
def update_html_paths(self, file_path):
"""Update paths in HTML files."""
if not os.path.exists(file_path):
return
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Only replace the app icon references, not all images
# Look for icon references that are likely the app icon (e.g., ../txteic.png)
content = re.sub(r'src="\.\.\/[^"]+\.png"', 'src="icon.png"', content)
# Also handle img tags with class="topicIcon" which are usually the app icons
content = re.sub(r'(<img[^>]*class="topicIcon"[^>]*src=")[^"]+(")', r'\1icon.png\2', content)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Updated HTML paths in {file_path}")
def run(self):
"""Execute the download process."""
print(f"Starting {self.app_name} help documentation download...\n")
print(f"URL: {self.help_url}\n")
# Create directory structure
self.create_directory_structure()
# Download main HTML file
self.download_main_html()
# Download resource files
self.download_resource_files()
# Download locale files
self.download_locale_files()
# Download HTML files
self.download_html_files()
print(f"\nDownload complete! The offline documentation is available at:")
print(f"{self.output_dir}/index.html")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Download Mavericks (10.9) Apple help documentation for offline use",
epilog="Example: %(prog)s https://help.apple.com/keychainaccess/mac/10.9/help/ -o keychain_help"
)
parser.add_argument("url",
help="The help URL (e.g., https://help.apple.com/textedit/help/10.9/help/)")
parser.add_argument("--output", "-o", default=None,
help="Output directory (default: app_name_help)")
args = parser.parse_args()
# Determine output directory
if args.output is None:
# Extract app name from URL
parsed_url = urlparse(args.url)
path_parts = parsed_url.path.strip('/').split('/')
if path_parts:
app_name = path_parts[0]
args.output = f"{app_name}_help"
else:
args.output = "app_help"
downloader = MavericksHelpDownloader(args.url, args.output)
downloader.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment