Last active
June 27, 2025 16:40
-
-
Save Wowfunhappy/f0a563688948e1076f171b48486cc61d to your computer and use it in GitHub Desktop.
Download OS X Mavericks Help Viewer Content
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| This script downloads and saves an offline copy of any Mavericks (10.9) Apple Help documentation. | |
| It creates a directory structure identical to the original help system. | |
| """ | |
| import os | |
| import re | |
| import json | |
| import requests | |
| from urllib.parse import urljoin, urlparse, urlunparse | |
| from pathlib import Path | |
| import shutil | |
| import argparse | |
| class MavericksHelpDownloader: | |
| def __init__(self, help_url, output_dir): | |
| self.help_url = help_url.rstrip('/') | |
| self.output_dir = Path(output_dir) | |
| # Parse the URL to get the app name | |
| parsed_url = urlparse(help_url) | |
| path_parts = parsed_url.path.strip('/').split('/') | |
| # Extract app name from URL (e.g., 'textedit' or 'keychainaccess') | |
| # URL format: https://help.apple.com/appname/mac/10.9/help/ | |
| if len(path_parts) >= 1: | |
| self.app_name = path_parts[0] | |
| else: | |
| self.app_name = 'unknown' | |
| # Use the provided URL as base URL (already includes /help/) | |
| self.base_url = self.help_url + '/' | |
| self.session = requests.Session() | |
| self.downloaded_files = set() | |
| # List of all possible language directories | |
| self.all_languages = [ | |
| 'ar.lproj', 'ca.lproj', 'cs.lproj', 'da.lproj', 'de.lproj', 'el.lproj', | |
| 'en.lproj', 'es.lproj', 'fi.lproj', 'fr.lproj', 'he.lproj', 'hr.lproj', | |
| 'hu.lproj', 'id.lproj', 'it.lproj', 'ja.lproj', 'ko.lproj', 'ms.lproj', | |
| 'nl.lproj', 'no.lproj', 'pl.lproj', 'pt_PT.lproj', 'pt.lproj', 'ro.lproj', | |
| 'ru.lproj', 'sk.lproj', 'sv.lproj', 'th.lproj', 'tr.lproj', 'uk.lproj', | |
| 'vi.lproj', 'zh_CN.lproj', 'zh_TW.lproj' | |
| ] | |
| print(f"App name: {self.app_name}") | |
| print(f"Base URL: {self.base_url}") | |
| def create_directory_structure(self): | |
| """Create the initial directory structure.""" | |
| print("Creating directory structure...") | |
| # Create main output directory | |
| os.makedirs(self.output_dir, exist_ok=True) | |
| # Create all language directories | |
| for lang in self.all_languages: | |
| os.makedirs(self.output_dir / lang, exist_ok=True) | |
| # Create resources/flamingo/6/images directory | |
| os.makedirs(self.output_dir / 'resources' / 'flamingo' / '6' / 'images', exist_ok=True) | |
| def download_file(self, url, local_path): | |
| """Download a file from URL and save it to local_path.""" | |
| # Skip if already downloaded | |
| if url in self.downloaded_files: | |
| return | |
| # Add to downloaded set | |
| self.downloaded_files.add(url) | |
| # Create full URL if relative | |
| if not url.startswith('http'): | |
| if url.startswith('/'): | |
| # Absolute path from root | |
| parsed_base = urlparse(self.base_url) | |
| full_url = urlunparse(( | |
| parsed_base.scheme, | |
| parsed_base.netloc, | |
| url, | |
| '', '', '' | |
| )) | |
| else: | |
| # Relative path | |
| full_url = urljoin(self.base_url, url) | |
| else: | |
| full_url = url | |
| # Convert Path object to string to avoid escaping issues | |
| local_path_str = str(local_path) | |
| # Create directory if it doesn't exist | |
| os.makedirs(os.path.dirname(local_path_str), exist_ok=True) | |
| # Download the file | |
| try: | |
| print(f"Downloading {full_url} -> {local_path_str}") | |
| response = self.session.get(full_url, timeout=30) | |
| response.raise_for_status() | |
| with open(local_path_str, 'wb') as f: | |
| f.write(response.content) | |
| return response.content | |
| except Exception as e: | |
| print(f"Error downloading {full_url}: {e}") | |
| return None | |
| def download_main_html(self): | |
| """Download the main index.html file.""" | |
| print("\nDownloading main HTML file...") | |
| # Download index.html | |
| self.download_file("index.html", self.output_dir / "index.html") | |
| # Update paths in index.html | |
| self.update_resource_paths(self.output_dir / "index.html") | |
| def download_resource_files(self): | |
| """Download CSS, JS, and image files.""" | |
| print("\nDownloading resource files...") | |
| # Download and save flamingo.css | |
| self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.css", | |
| self.output_dir / "resources" / "flamingo" / "6" / "flamingo.css") | |
| # Download and save jquery.js | |
| self.download_file("/Library/Documentation/Resources/Flamingo/6/jquery.js", | |
| self.output_dir / "resources" / "flamingo" / "6" / "jquery.js") | |
| # Use provided flamingo.js or download it | |
| if os.path.exists("flamingo.js"): | |
| # Copy the provided flamingo.js | |
| shutil.copy("flamingo.js", | |
| self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js") | |
| print(f"Copied local flamingo.js to {self.output_dir / 'resources' / 'flamingo' / '6' / 'flamingo.js'}") | |
| else: | |
| # Download flamingo.js | |
| self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.js", | |
| self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js") | |
| # Create required image directories | |
| os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "red", exist_ok=True) | |
| os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine", exist_ok=True) | |
| os.makedirs(self.output_dir / "media", exist_ok=True) | |
| # Download all required images | |
| image_files = [ | |
| # Base images | |
| ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"), | |
| ("/Library/Documentation/Resources/Flamingo/6/images/disclosure-open.png", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "disclosure-open.png"), | |
| ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"), | |
| ("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close.png", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close.png"), | |
| ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"), | |
| ("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close-hover.png", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close-hover.png"), | |
| ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"), | |
| # Red theme images | |
| ("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow.png", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow.png"), | |
| ("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow_blue.png", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow_blue.png"), | |
| # Tangerine theme images | |
| ("/Library/Documentation/Resources/Flamingo/6/images/tangerine/[email protected]", | |
| self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine" / "[email protected]"), | |
| ] | |
| # Download each image | |
| for src, dest in image_files: | |
| self.download_file(src, dest) | |
| # Download media files - check if they exist first | |
| media_files = self.find_media_files() | |
| for media_file in media_files: | |
| local_path = self.output_dir / media_file | |
| self.download_file(media_file, local_path) | |
| def find_media_files(self): | |
| """Find media files referenced in the help content.""" | |
| media_files = [] | |
| # Look for icon files | |
| # Common patterns for app icons | |
| possible_icons = [ | |
| f"{self.app_name[:4]}ic.png", # e.g., txteic.png for textedit | |
| f"{self.app_name[:5]}ic.png", # e.g., keychic.png for keychainaccess | |
| f"xicn{self.app_name[:4]}.png", # e.g., xicntxte.png | |
| f"{self.app_name}.png", # e.g., keychainaccess.png | |
| ] | |
| # Try to find icon file | |
| for icon in possible_icons: | |
| media_files.append(icon) | |
| media_files.append(f"media/{icon}") | |
| # Add any other common media patterns | |
| media_files.append("media/xicntxte.png") # Common pattern | |
| return media_files | |
| def download_locale_files(self): | |
| """Download locale-list.json and other locale-specific files.""" | |
| print("\nDownloading locale files...") | |
| # Download locale-list.json | |
| locale_list_content = self.download_file("locale-list.json", | |
| self.output_dir / "locale-list.json") | |
| # Track if we found the icon | |
| icon_found = False | |
| # Download locale files for all languages | |
| for lang in self.all_languages: | |
| print(f"\nDownloading {lang} files...") | |
| # Download locale files | |
| self.download_file(f"{lang}/locale-info.json", | |
| self.output_dir / lang / "locale-info.json") | |
| self.download_file(f"{lang}/navigation.json", | |
| self.output_dir / lang / "navigation.json") | |
| self.download_file(f"{lang}/search.json", | |
| self.output_dir / lang / "search.json") | |
| # Download search.helpindex file | |
| self.download_file(f"{lang}/search.helpindex", | |
| self.output_dir / lang / "search.helpindex") | |
| # For the first language with a valid navigation.json, extract and download the icon | |
| if not icon_found and lang == 'en.lproj': | |
| nav_json_path = self.output_dir / lang / "navigation.json" | |
| if nav_json_path.exists(): | |
| with open(nav_json_path, 'r', encoding='utf-8') as f: | |
| nav_data_original = f.read() | |
| # Extract the original icon name | |
| icon_match = re.search(r'"icon":"(?:\.\.\/)?([^"]+\.png)"', nav_data_original) | |
| if icon_match: | |
| original_icon = icon_match.group(1) | |
| # Remove escaped forward slashes from JSON | |
| original_icon = original_icon.replace(r'\/', '/') | |
| icon_filename = os.path.basename(original_icon) | |
| # Handle different icon path patterns | |
| if "/" in original_icon and "../" not in original_icon: | |
| # Icon is in a subdirectory (e.g., Art/xicnimov.png) | |
| icon_path = f"{lang}/{original_icon}" | |
| local_icon_path = os.path.join(str(self.output_dir), lang, original_icon) | |
| if self.download_file(icon_path, local_icon_path): | |
| if os.path.exists(local_icon_path): | |
| shutil.copy(local_icon_path, self.output_dir / "icon.png") | |
| print(f"Found and copied icon: {original_icon}") | |
| icon_found = True | |
| elif "../" in original_icon: | |
| # Icon is in parent directory | |
| if self.download_file(icon_filename, self.output_dir / icon_filename): | |
| shutil.copy(self.output_dir / icon_filename, self.output_dir / "icon.png") | |
| print(f"Found and copied icon: {icon_filename}") | |
| icon_found = True | |
| else: | |
| # Icon is in language directory | |
| if self.download_file(f"{lang}/{original_icon}", self.output_dir / lang / original_icon): | |
| shutil.copy(self.output_dir / lang / original_icon, self.output_dir / "icon.png") | |
| print(f"Found and copied icon: {original_icon}") | |
| icon_found = True | |
| # Fix icon paths in navigation.json for each language | |
| nav_json_path = self.output_dir / lang / "navigation.json" | |
| if nav_json_path.exists(): | |
| with open(nav_json_path, 'r', encoding='utf-8') as f: | |
| nav_data = f.read() | |
| # Update icon paths to use the common icon.png | |
| nav_data = re.sub(r'"icon":"[^"]+\.png"', '"icon":"icon.png"', nav_data) | |
| with open(nav_json_path, 'w', encoding='utf-8') as f: | |
| f.write(nav_data) | |
| # Copy icon to each language directory | |
| if icon_found and os.path.exists(self.output_dir / "icon.png"): | |
| shutil.copy(self.output_dir / "icon.png", self.output_dir / lang / "icon.png") | |
| return locale_list_content | |
| def download_html_files(self): | |
| """Download all HTML content files.""" | |
| print("\nDownloading HTML content files...") | |
| # Download HTML files for all languages | |
| for lang in self.all_languages: | |
| print(f"\nDownloading {lang} HTML files...") | |
| # Get the list of HTML files from navigation.json | |
| nav_json_path = self.output_dir / lang / "navigation.json" | |
| if not nav_json_path.exists(): | |
| print(f"Skipping {lang}: navigation.json not found") | |
| continue | |
| try: | |
| with open(nav_json_path, 'r', encoding='utf-8') as f: | |
| nav_data = json.load(f) | |
| except: | |
| print(f"Skipping {lang}: Could not parse navigation.json") | |
| continue | |
| # Extract HTML files from navigation.json topics | |
| html_files = [] | |
| def extract_hrefs(obj): | |
| """Recursively extract href values from navigation structure.""" | |
| if isinstance(obj, dict): | |
| if 'href' in obj and obj['href'].endswith('.html'): | |
| html_files.append(obj['href']) | |
| for value in obj.values(): | |
| extract_hrefs(value) | |
| elif isinstance(obj, list): | |
| for item in obj: | |
| extract_hrefs(item) | |
| extract_hrefs(nav_data) | |
| # Remove duplicates and sort | |
| html_files = sorted(set(html_files)) | |
| # Download special files | |
| special_files = [ | |
| "searchable-landing.html", | |
| "offline-message.html" | |
| ] | |
| for file in special_files: | |
| self.download_file(f"{lang}/{file}", | |
| self.output_dir / lang / file) | |
| # Download each HTML file and collect image references | |
| image_files = set() | |
| for html_file in html_files: | |
| html_path = self.output_dir / lang / html_file | |
| self.download_file(f"{lang}/{html_file}", html_path) | |
| # Extract image references from HTML (only for en.lproj to avoid duplicates) | |
| if lang == 'en.lproj' and html_path.exists(): | |
| with open(html_path, 'r', encoding='utf-8') as f: | |
| html_content = f.read() | |
| # Find all image sources in the HTML | |
| img_matches = re.findall(r'src="([^"]+\.(?:png|jpg|jpeg|gif))"', html_content, re.IGNORECASE) | |
| for img in img_matches: | |
| # Skip absolute URLs and icon.png references | |
| if not img.startswith('http') and img != 'icon.png' and not img.startswith('../'): | |
| image_files.add(img) | |
| # Fix paths in the HTML file | |
| self.update_html_paths(html_path) | |
| # Download Art directory images (only once from en.lproj) | |
| if lang == 'en.lproj' and image_files: | |
| print("\nDownloading content images...") | |
| for img_file in sorted(image_files): | |
| img_path = f"{lang}/{img_file}" | |
| local_img_path = os.path.join(str(self.output_dir), lang, img_file) | |
| self.download_file(img_path, local_img_path) | |
| # Copy images to all other language directories | |
| if os.path.exists(local_img_path): | |
| for other_lang in self.all_languages: | |
| if other_lang != lang: | |
| other_img_path = os.path.join(str(self.output_dir), other_lang, img_file) | |
| os.makedirs(os.path.dirname(other_img_path), exist_ok=True) | |
| shutil.copy(local_img_path, other_img_path) | |
| def update_resource_paths(self, file_path): | |
| """Update resource paths in index.html to use local resources.""" | |
| if not os.path.exists(file_path): | |
| return | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| # Replace resource paths | |
| content = content.replace('/Library/Documentation/Resources/Flamingo/6/', | |
| 'resources/flamingo/6/') | |
| with open(file_path, 'w', encoding='utf-8') as f: | |
| f.write(content) | |
| print(f"Updated resource paths in {file_path}") | |
| def update_html_paths(self, file_path): | |
| """Update paths in HTML files.""" | |
| if not os.path.exists(file_path): | |
| return | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| # Only replace the app icon references, not all images | |
| # Look for icon references that are likely the app icon (e.g., ../txteic.png) | |
| content = re.sub(r'src="\.\.\/[^"]+\.png"', 'src="icon.png"', content) | |
| # Also handle img tags with class="topicIcon" which are usually the app icons | |
| content = re.sub(r'(<img[^>]*class="topicIcon"[^>]*src=")[^"]+(")', r'\1icon.png\2', content) | |
| with open(file_path, 'w', encoding='utf-8') as f: | |
| f.write(content) | |
| print(f"Updated HTML paths in {file_path}") | |
| def run(self): | |
| """Execute the download process.""" | |
| print(f"Starting {self.app_name} help documentation download...\n") | |
| print(f"URL: {self.help_url}\n") | |
| # Create directory structure | |
| self.create_directory_structure() | |
| # Download main HTML file | |
| self.download_main_html() | |
| # Download resource files | |
| self.download_resource_files() | |
| # Download locale files | |
| self.download_locale_files() | |
| # Download HTML files | |
| self.download_html_files() | |
| print(f"\nDownload complete! The offline documentation is available at:") | |
| print(f"{self.output_dir}/index.html") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser( | |
| description="Download Mavericks (10.9) Apple help documentation for offline use", | |
| epilog="Example: %(prog)s https://help.apple.com/keychainaccess/mac/10.9/help/ -o keychain_help" | |
| ) | |
| parser.add_argument("url", | |
| help="The help URL (e.g., https://help.apple.com/textedit/help/10.9/help/)") | |
| parser.add_argument("--output", "-o", default=None, | |
| help="Output directory (default: app_name_help)") | |
| args = parser.parse_args() | |
| # Determine output directory | |
| if args.output is None: | |
| # Extract app name from URL | |
| parsed_url = urlparse(args.url) | |
| path_parts = parsed_url.path.strip('/').split('/') | |
| if path_parts: | |
| app_name = path_parts[0] | |
| args.output = f"{app_name}_help" | |
| else: | |
| args.output = "app_help" | |
| downloader = MavericksHelpDownloader(args.url, args.output) | |
| downloader.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment