Wowfunhappy · June 27, 2025 16:40
diff --git a/mavericks_help_downloader b/mavericks_help_downloader
 #!/usr/bin/env python3
 """
 This script downloads and saves an offline copy of any Mavericks (10.9) Apple Help documentation.
 It creates a directory structure identical to the original help system.
 """

 import os
 import re
 import json
 import requests
 from urllib.parse import urljoin, urlparse, urlunparse
 from pathlib import Path
 import shutil
 import argparse


 class MavericksHelpDownloader:
    def __init__(self, help_url, output_dir):
        self.help_url = help_url.rstrip('/')
        self.output_dir = Path(output_dir)
        
        # Parse the URL to get the app name
        parsed_url = urlparse(help_url)
        path_parts = parsed_url.path.strip('/').split('/')
        
        # Extract app name from URL (e.g., 'textedit' or 'keychainaccess')
        # URL format: https://help.apple.com/appname/mac/10.9/help/
        if len(path_parts) >= 1:
            self.app_name = path_parts[0]
        else:
            self.app_name = 'unknown'
        
        # Use the provided URL as base URL (already includes /help/)
        self.base_url = self.help_url + '/'
        
        self.session = requests.Session()
        self.downloaded_files = set()
        
        # List of all possible language directories
        self.all_languages = [
            'ar.lproj', 'ca.lproj', 'cs.lproj', 'da.lproj', 'de.lproj', 'el.lproj',
            'en.lproj', 'es.lproj', 'fi.lproj', 'fr.lproj', 'he.lproj', 'hr.lproj',
            'hu.lproj', 'id.lproj', 'it.lproj', 'ja.lproj', 'ko.lproj', 'ms.lproj',
            'nl.lproj', 'no.lproj', 'pl.lproj', 'pt_PT.lproj', 'pt.lproj', 'ro.lproj',
            'ru.lproj', 'sk.lproj', 'sv.lproj', 'th.lproj', 'tr.lproj', 'uk.lproj',
            'vi.lproj', 'zh_CN.lproj', 'zh_TW.lproj'
        ]
        
        print(f"App name: {self.app_name}")
        print(f"Base URL: {self.base_url}")

    def create_directory_structure(self):
        """Create the initial directory structure."""
        print("Creating directory structure...")
        
        # Create main output directory
        os.makedirs(self.output_dir, exist_ok=True)
        
        # Create all language directories
        for lang in self.all_languages:
            os.makedirs(self.output_dir / lang, exist_ok=True)
        
        # Create resources/flamingo/6/images directory
        os.makedirs(self.output_dir / 'resources' / 'flamingo' / '6' / 'images', exist_ok=True)

    def download_file(self, url, local_path):
        """Download a file from URL and save it to local_path."""
        # Skip if already downloaded
        if url in self.downloaded_files:
            return
        
        # Add to downloaded set
        self.downloaded_files.add(url)
        
        # Create full URL if relative
        if not url.startswith('http'):
            if url.startswith('/'):
                # Absolute path from root
                parsed_base = urlparse(self.base_url)
                full_url = urlunparse((
                    parsed_base.scheme,
                    parsed_base.netloc,
                    url,
                    '', '', ''
                ))
            else:
                # Relative path
                full_url = urljoin(self.base_url, url)
        else:
            full_url = url
            
        # Convert Path object to string to avoid escaping issues
        local_path_str = str(local_path)
        
        # Create directory if it doesn't exist
        os.makedirs(os.path.dirname(local_path_str), exist_ok=True)
        
        # Download the file
        try:
            print(f"Downloading {full_url} -> {local_path_str}")
            response = self.session.get(full_url, timeout=30)
            response.raise_for_status()
            
            with open(local_path_str, 'wb') as f:
                f.write(response.content)
                
            return response.content
        except Exception as e:
            print(f"Error downloading {full_url}: {e}")
            return None

    def download_main_html(self):
        """Download the main index.html file."""
        print("\nDownloading main HTML file...")
        
        # Download index.html
        self.download_file("index.html", self.output_dir / "index.html")
        
        # Update paths in index.html
        self.update_resource_paths(self.output_dir / "index.html")
        
    def download_resource_files(self):
        """Download CSS, JS, and image files."""
        print("\nDownloading resource files...")

        # Download and save flamingo.css
        self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.css",
                          self.output_dir / "resources" / "flamingo" / "6" / "flamingo.css")

        # Download and save jquery.js
        self.download_file("/Library/Documentation/Resources/Flamingo/6/jquery.js",
                          self.output_dir / "resources" / "flamingo" / "6" / "jquery.js")

        # Use provided flamingo.js or download it
        if os.path.exists("flamingo.js"):
            # Copy the provided flamingo.js
            shutil.copy("flamingo.js",
                       self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js")
            print(f"Copied local flamingo.js to {self.output_dir / 'resources' / 'flamingo' / '6' / 'flamingo.js'}")
        else:
            # Download flamingo.js
            self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.js",
                              self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js")

        # Create required image directories
        os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "red", exist_ok=True)
        os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine", exist_ok=True)
        os.makedirs(self.output_dir / "media", exist_ok=True)

        # Download all required images
        image_files = [
            # Base images
            ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
            ("/Library/Documentation/Resources/Flamingo/6/images/disclosure-open.png",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "disclosure-open.png"),
            ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
            ("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close.png",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close.png"),
            ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
            ("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close-hover.png",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close-hover.png"),
            ("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),

            # Red theme images
            ("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow.png",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow.png"),
            ("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow_blue.png",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow_blue.png"),

            # Tangerine theme images
            ("/Library/Documentation/Resources/Flamingo/6/images/tangerine/[email protected]",
             self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine" / "[email protected]"),
        ]

        # Download each image
        for src, dest in image_files:
            self.download_file(src, dest)

        # Download media files - check if they exist first
        media_files = self.find_media_files()
        for media_file in media_files:
            local_path = self.output_dir / media_file
            self.download_file(media_file, local_path)

    def find_media_files(self):
        """Find media files referenced in the help content."""
        media_files = []
        
        # Look for icon files
        # Common patterns for app icons
        possible_icons = [
            f"{self.app_name[:4]}ic.png",  # e.g., txteic.png for textedit
            f"{self.app_name[:5]}ic.png",  # e.g., keychic.png for keychainaccess
            f"xicn{self.app_name[:4]}.png",  # e.g., xicntxte.png
            f"{self.app_name}.png",  # e.g., keychainaccess.png
        ]
        
        # Try to find icon file
        for icon in possible_icons:
            media_files.append(icon)
            media_files.append(f"media/{icon}")
        
        # Add any other common media patterns
        media_files.append("media/xicntxte.png")  # Common pattern
        
        return media_files

    def download_locale_files(self):
        """Download locale-list.json and other locale-specific files."""
        print("\nDownloading locale files...")
        
        # Download locale-list.json
        locale_list_content = self.download_file("locale-list.json", 
                                               self.output_dir / "locale-list.json")
        
        # Track if we found the icon
        icon_found = False
        
        # Download locale files for all languages
        for lang in self.all_languages:
            print(f"\nDownloading {lang} files...")
            
            # Download locale files
            self.download_file(f"{lang}/locale-info.json", 
                              self.output_dir / lang / "locale-info.json")
            
            self.download_file(f"{lang}/navigation.json", 
                              self.output_dir / lang / "navigation.json")
            
            self.download_file(f"{lang}/search.json", 
                              self.output_dir / lang / "search.json")
            
            # Download search.helpindex file
            self.download_file(f"{lang}/search.helpindex", 
                              self.output_dir / lang / "search.helpindex")
            
            # For the first language with a valid navigation.json, extract and download the icon
            if not icon_found and lang == 'en.lproj':
                nav_json_path = self.output_dir / lang / "navigation.json"
                if nav_json_path.exists():
                    with open(nav_json_path, 'r', encoding='utf-8') as f:
                        nav_data_original = f.read()
                    
                    # Extract the original icon name
                    icon_match = re.search(r'"icon":"(?:\.\.\/)?([^"]+\.png)"', nav_data_original)
                    if icon_match:
                        original_icon = icon_match.group(1)
                        # Remove escaped forward slashes from JSON
                        original_icon = original_icon.replace(r'\/', '/')
                        icon_filename = os.path.basename(original_icon)
                        
                        # Handle different icon path patterns
                        if "/" in original_icon and "../" not in original_icon:
                            # Icon is in a subdirectory (e.g., Art/xicnimov.png)
                            icon_path = f"{lang}/{original_icon}"
                            local_icon_path = os.path.join(str(self.output_dir), lang, original_icon)
                            
                            if self.download_file(icon_path, local_icon_path):
                                if os.path.exists(local_icon_path):
                                    shutil.copy(local_icon_path, self.output_dir / "icon.png")
                                    print(f"Found and copied icon: {original_icon}")
                                    icon_found = True
                        elif "../" in original_icon:
                            # Icon is in parent directory
                            if self.download_file(icon_filename, self.output_dir / icon_filename):
                                shutil.copy(self.output_dir / icon_filename, self.output_dir / "icon.png")
                                print(f"Found and copied icon: {icon_filename}")
                                icon_found = True
                        else:
                            # Icon is in language directory
                            if self.download_file(f"{lang}/{original_icon}", self.output_dir / lang / original_icon):
                                shutil.copy(self.output_dir / lang / original_icon, self.output_dir / "icon.png")
                                print(f"Found and copied icon: {original_icon}")
                                icon_found = True
            
            # Fix icon paths in navigation.json for each language
            nav_json_path = self.output_dir / lang / "navigation.json"
            if nav_json_path.exists():
                with open(nav_json_path, 'r', encoding='utf-8') as f:
                    nav_data = f.read()
                
                # Update icon paths to use the common icon.png
                nav_data = re.sub(r'"icon":"[^"]+\.png"', '"icon":"icon.png"', nav_data)
                
                with open(nav_json_path, 'w', encoding='utf-8') as f:
                    f.write(nav_data)
                
                # Copy icon to each language directory
                if icon_found and os.path.exists(self.output_dir / "icon.png"):
                    shutil.copy(self.output_dir / "icon.png", self.output_dir / lang / "icon.png")
        
        return locale_list_content


    def download_html_files(self):
        """Download all HTML content files."""
        print("\nDownloading HTML content files...")
        
        # Download HTML files for all languages
        for lang in self.all_languages:
            print(f"\nDownloading {lang} HTML files...")
            
            # Get the list of HTML files from navigation.json
            nav_json_path = self.output_dir / lang / "navigation.json"
            if not nav_json_path.exists():
                print(f"Skipping {lang}: navigation.json not found")
                continue
            
            try:
                with open(nav_json_path, 'r', encoding='utf-8') as f:
                    nav_data = json.load(f)
            except:
                print(f"Skipping {lang}: Could not parse navigation.json")
                continue
            
            # Extract HTML files from navigation.json topics
            html_files = []
            
            def extract_hrefs(obj):
                """Recursively extract href values from navigation structure."""
                if isinstance(obj, dict):
                    if 'href' in obj and obj['href'].endswith('.html'):
                        html_files.append(obj['href'])
                    for value in obj.values():
                        extract_hrefs(value)
                elif isinstance(obj, list):
                    for item in obj:
                        extract_hrefs(item)
            
            extract_hrefs(nav_data)
            
            # Remove duplicates and sort
            html_files = sorted(set(html_files))
            
            # Download special files
            special_files = [
                "searchable-landing.html",
                "offline-message.html"
            ]
            
            for file in special_files:
                self.download_file(f"{lang}/{file}", 
                                  self.output_dir / lang / file)
            
            # Download each HTML file and collect image references
            image_files = set()
            
            for html_file in html_files:
                html_path = self.output_dir / lang / html_file
                self.download_file(f"{lang}/{html_file}", html_path)
                
                # Extract image references from HTML (only for en.lproj to avoid duplicates)
                if lang == 'en.lproj' and html_path.exists():
                    with open(html_path, 'r', encoding='utf-8') as f:
                        html_content = f.read()
                    
                    # Find all image sources in the HTML
                    img_matches = re.findall(r'src="([^"]+\.(?:png|jpg|jpeg|gif))"', html_content, re.IGNORECASE)
                    for img in img_matches:
                        # Skip absolute URLs and icon.png references
                        if not img.startswith('http') and img != 'icon.png' and not img.startswith('../'):
                            image_files.add(img)
                
                # Fix paths in the HTML file
                self.update_html_paths(html_path)
            
            # Download Art directory images (only once from en.lproj)
            if lang == 'en.lproj' and image_files:
                print("\nDownloading content images...")
                for img_file in sorted(image_files):
                    img_path = f"{lang}/{img_file}"
                    local_img_path = os.path.join(str(self.output_dir), lang, img_file)
                    self.download_file(img_path, local_img_path)
                    
                    # Copy images to all other language directories
                    if os.path.exists(local_img_path):
                        for other_lang in self.all_languages:
                            if other_lang != lang:
                                other_img_path = os.path.join(str(self.output_dir), other_lang, img_file)
                                os.makedirs(os.path.dirname(other_img_path), exist_ok=True)
                                shutil.copy(local_img_path, other_img_path)
    
    def update_resource_paths(self, file_path):
        """Update resource paths in index.html to use local resources."""
        if not os.path.exists(file_path):
            return
        
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Replace resource paths
        content = content.replace('/Library/Documentation/Resources/Flamingo/6/', 
                                 'resources/flamingo/6/')
        
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
            
        print(f"Updated resource paths in {file_path}")
    
    def update_html_paths(self, file_path):
        """Update paths in HTML files."""
        if not os.path.exists(file_path):
            return
        
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Only replace the app icon references, not all images
        # Look for icon references that are likely the app icon (e.g., ../txteic.png)
        content = re.sub(r'src="\.\.\/[^"]+\.png"', 'src="icon.png"', content)
        
        # Also handle img tags with class="topicIcon" which are usually the app icons
        content = re.sub(r'(<img[^>]*class="topicIcon"[^>]*src=")[^"]+(")', r'\1icon.png\2', content)
        
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
            
        print(f"Updated HTML paths in {file_path}")
    
    def run(self):
        """Execute the download process."""
        print(f"Starting {self.app_name} help documentation download...\n")
        print(f"URL: {self.help_url}\n")
        
        # Create directory structure
        self.create_directory_structure()
        
        # Download main HTML file
        self.download_main_html()
        
        # Download resource files
        self.download_resource_files()
        
        # Download locale files
        self.download_locale_files()
        
        # Download HTML files
        self.download_html_files()
        
        print(f"\nDownload complete! The offline documentation is available at:")
        print(f"{self.output_dir}/index.html")


 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Download Mavericks (10.9) Apple help documentation for offline use",
        epilog="Example: %(prog)s https://help.apple.com/keychainaccess/mac/10.9/help/ -o keychain_help"
    )
    parser.add_argument("url", 
                        help="The help URL (e.g., https://help.apple.com/textedit/help/10.9/help/)")
    parser.add_argument("--output", "-o", default=None, 
                        help="Output directory (default: app_name_help)")
    
    args = parser.parse_args()
    
    # Determine output directory
    if args.output is None:
        # Extract app name from URL
        parsed_url = urlparse(args.url)
        path_parts = parsed_url.path.strip('/').split('/')
        if path_parts:
            app_name = path_parts[0]
            args.output = f"{app_name}_help"
        else:
            args.output = "app_help"
    
    downloader = MavericksHelpDownloader(args.url, args.output)
    downloader.run()
	#!/usr/bin/env python3
	"""
	This script downloads and saves an offline copy of any Mavericks (10.9) Apple Help documentation.
	It creates a directory structure identical to the original help system.
	"""

	import os
	import re
	import json
	import requests
	from urllib.parse import urljoin, urlparse, urlunparse
	from pathlib import Path
	import shutil
	import argparse


	class MavericksHelpDownloader:
	def __init__(self, help_url, output_dir):
	self.help_url = help_url.rstrip('/')
	self.output_dir = Path(output_dir)

	# Parse the URL to get the app name
	parsed_url = urlparse(help_url)
	path_parts = parsed_url.path.strip('/').split('/')

	# Extract app name from URL (e.g., 'textedit' or 'keychainaccess')
	# URL format: https://help.apple.com/appname/mac/10.9/help/
	if len(path_parts) >= 1:
	self.app_name = path_parts[0]
	else:
	self.app_name = 'unknown'

	# Use the provided URL as base URL (already includes /help/)
	self.base_url = self.help_url + '/'

	self.session = requests.Session()
	self.downloaded_files = set()

	# List of all possible language directories
	self.all_languages = [
	'ar.lproj', 'ca.lproj', 'cs.lproj', 'da.lproj', 'de.lproj', 'el.lproj',
	'en.lproj', 'es.lproj', 'fi.lproj', 'fr.lproj', 'he.lproj', 'hr.lproj',
	'hu.lproj', 'id.lproj', 'it.lproj', 'ja.lproj', 'ko.lproj', 'ms.lproj',
	'nl.lproj', 'no.lproj', 'pl.lproj', 'pt_PT.lproj', 'pt.lproj', 'ro.lproj',
	'ru.lproj', 'sk.lproj', 'sv.lproj', 'th.lproj', 'tr.lproj', 'uk.lproj',
	'vi.lproj', 'zh_CN.lproj', 'zh_TW.lproj'
	]

	print(f"App name: {self.app_name}")
	print(f"Base URL: {self.base_url}")

	def create_directory_structure(self):
	"""Create the initial directory structure."""
	print("Creating directory structure...")

	# Create main output directory
	os.makedirs(self.output_dir, exist_ok=True)

	# Create all language directories
	for lang in self.all_languages:
	os.makedirs(self.output_dir / lang, exist_ok=True)

	# Create resources/flamingo/6/images directory
	os.makedirs(self.output_dir / 'resources' / 'flamingo' / '6' / 'images', exist_ok=True)

	def download_file(self, url, local_path):
	"""Download a file from URL and save it to local_path."""
	# Skip if already downloaded
	if url in self.downloaded_files:
	return

	# Add to downloaded set
	self.downloaded_files.add(url)

	# Create full URL if relative
	if not url.startswith('http'):
	if url.startswith('/'):
	# Absolute path from root
	parsed_base = urlparse(self.base_url)
	full_url = urlunparse((
	parsed_base.scheme,
	parsed_base.netloc,
	url,
	'', '', ''
	))
	else:
	# Relative path
	full_url = urljoin(self.base_url, url)
	else:
	full_url = url

	# Convert Path object to string to avoid escaping issues
	local_path_str = str(local_path)

	# Create directory if it doesn't exist
	os.makedirs(os.path.dirname(local_path_str), exist_ok=True)

	# Download the file
	try:
	print(f"Downloading {full_url} -> {local_path_str}")
	response = self.session.get(full_url, timeout=30)
	response.raise_for_status()

	with open(local_path_str, 'wb') as f:
	f.write(response.content)

	return response.content
	except Exception as e:
	print(f"Error downloading {full_url}: {e}")
	return None

	def download_main_html(self):
	"""Download the main index.html file."""
	print("\nDownloading main HTML file...")

	# Download index.html
	self.download_file("index.html", self.output_dir / "index.html")

	# Update paths in index.html
	self.update_resource_paths(self.output_dir / "index.html")

	def download_resource_files(self):
	"""Download CSS, JS, and image files."""
	print("\nDownloading resource files...")

	# Download and save flamingo.css
	self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.css",
	self.output_dir / "resources" / "flamingo" / "6" / "flamingo.css")

	# Download and save jquery.js
	self.download_file("/Library/Documentation/Resources/Flamingo/6/jquery.js",
	self.output_dir / "resources" / "flamingo" / "6" / "jquery.js")

	# Use provided flamingo.js or download it
	if os.path.exists("flamingo.js"):
	# Copy the provided flamingo.js
	shutil.copy("flamingo.js",
	self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js")
	print(f"Copied local flamingo.js to {self.output_dir / 'resources' / 'flamingo' / '6' / 'flamingo.js'}")
	else:
	# Download flamingo.js
	self.download_file("/Library/Documentation/Resources/Flamingo/6/flamingo.js",
	self.output_dir / "resources" / "flamingo" / "6" / "flamingo.js")

	# Create required image directories
	os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "red", exist_ok=True)
	os.makedirs(self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine", exist_ok=True)
	os.makedirs(self.output_dir / "media", exist_ok=True)

	# Download all required images
	image_files = [
	# Base images
	("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
	("/Library/Documentation/Resources/Flamingo/6/images/disclosure-open.png",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "disclosure-open.png"),
	("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
	("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close.png",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close.png"),
	("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),
	("/Library/Documentation/Resources/Flamingo/6/images/lightbox-close-hover.png",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "lightbox-close-hover.png"),
	("/Library/Documentation/Resources/Flamingo/6/images/[email protected]",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "[email protected]"),

	# Red theme images
	("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow.png",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow.png"),
	("/Library/Documentation/Resources/Flamingo/6/images/red/rightTOCarrow_blue.png",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "red" / "rightTOCarrow_blue.png"),

	# Tangerine theme images
	("/Library/Documentation/Resources/Flamingo/6/images/tangerine/[email protected]",
	self.output_dir / "resources" / "flamingo" / "6" / "images" / "tangerine" / "[email protected]"),
	]

	# Download each image
	for src, dest in image_files:
	self.download_file(src, dest)

	# Download media files - check if they exist first
	media_files = self.find_media_files()
	for media_file in media_files:
	local_path = self.output_dir / media_file
	self.download_file(media_file, local_path)

	def find_media_files(self):
	"""Find media files referenced in the help content."""
	media_files = []

	# Look for icon files
	# Common patterns for app icons
	possible_icons = [
	f"{self.app_name[:4]}ic.png", # e.g., txteic.png for textedit
	f"{self.app_name[:5]}ic.png", # e.g., keychic.png for keychainaccess
	f"xicn{self.app_name[:4]}.png", # e.g., xicntxte.png
	f"{self.app_name}.png", # e.g., keychainaccess.png
	]

	# Try to find icon file
	for icon in possible_icons:
	media_files.append(icon)
	media_files.append(f"media/{icon}")

	# Add any other common media patterns
	media_files.append("media/xicntxte.png") # Common pattern

	return media_files

	def download_locale_files(self):
	"""Download locale-list.json and other locale-specific files."""
	print("\nDownloading locale files...")

	# Download locale-list.json
	locale_list_content = self.download_file("locale-list.json",
	self.output_dir / "locale-list.json")

	# Track if we found the icon
	icon_found = False

	# Download locale files for all languages
	for lang in self.all_languages:
	print(f"\nDownloading {lang} files...")

	# Download locale files
	self.download_file(f"{lang}/locale-info.json",
	self.output_dir / lang / "locale-info.json")

	self.download_file(f"{lang}/navigation.json",
	self.output_dir / lang / "navigation.json")

	self.download_file(f"{lang}/search.json",
	self.output_dir / lang / "search.json")

	# Download search.helpindex file
	self.download_file(f"{lang}/search.helpindex",
	self.output_dir / lang / "search.helpindex")

	# For the first language with a valid navigation.json, extract and download the icon
	if not icon_found and lang == 'en.lproj':
	nav_json_path = self.output_dir / lang / "navigation.json"
	if nav_json_path.exists():
	with open(nav_json_path, 'r', encoding='utf-8') as f:
	nav_data_original = f.read()

	# Extract the original icon name
	icon_match = re.search(r'"icon":"(?:\.\.\/)?([^"]+\.png)"', nav_data_original)
	if icon_match:
	original_icon = icon_match.group(1)
	# Remove escaped forward slashes from JSON
	original_icon = original_icon.replace(r'\/', '/')
	icon_filename = os.path.basename(original_icon)

	# Handle different icon path patterns
	if "/" in original_icon and "../" not in original_icon:
	# Icon is in a subdirectory (e.g., Art/xicnimov.png)
	icon_path = f"{lang}/{original_icon}"
	local_icon_path = os.path.join(str(self.output_dir), lang, original_icon)

	if self.download_file(icon_path, local_icon_path):
	if os.path.exists(local_icon_path):
	shutil.copy(local_icon_path, self.output_dir / "icon.png")
	print(f"Found and copied icon: {original_icon}")
	icon_found = True
	elif "../" in original_icon:
	# Icon is in parent directory
	if self.download_file(icon_filename, self.output_dir / icon_filename):
	shutil.copy(self.output_dir / icon_filename, self.output_dir / "icon.png")
	print(f"Found and copied icon: {icon_filename}")
	icon_found = True
	else:
	# Icon is in language directory
	if self.download_file(f"{lang}/{original_icon}", self.output_dir / lang / original_icon):
	shutil.copy(self.output_dir / lang / original_icon, self.output_dir / "icon.png")
	print(f"Found and copied icon: {original_icon}")
	icon_found = True

	# Fix icon paths in navigation.json for each language
	nav_json_path = self.output_dir / lang / "navigation.json"
	if nav_json_path.exists():
	with open(nav_json_path, 'r', encoding='utf-8') as f:
	nav_data = f.read()

	# Update icon paths to use the common icon.png
	nav_data = re.sub(r'"icon":"[^"]+\.png"', '"icon":"icon.png"', nav_data)

	with open(nav_json_path, 'w', encoding='utf-8') as f:
	f.write(nav_data)

	# Copy icon to each language directory
	if icon_found and os.path.exists(self.output_dir / "icon.png"):
	shutil.copy(self.output_dir / "icon.png", self.output_dir / lang / "icon.png")

	return locale_list_content


	def download_html_files(self):
	"""Download all HTML content files."""
	print("\nDownloading HTML content files...")

	# Download HTML files for all languages
	for lang in self.all_languages:
	print(f"\nDownloading {lang} HTML files...")

	# Get the list of HTML files from navigation.json
	nav_json_path = self.output_dir / lang / "navigation.json"
	if not nav_json_path.exists():
	print(f"Skipping {lang}: navigation.json not found")
	continue

	try:
	with open(nav_json_path, 'r', encoding='utf-8') as f:
	nav_data = json.load(f)
	except:
	print(f"Skipping {lang}: Could not parse navigation.json")
	continue

	# Extract HTML files from navigation.json topics
	html_files = []

	def extract_hrefs(obj):
	"""Recursively extract href values from navigation structure."""
	if isinstance(obj, dict):
	if 'href' in obj and obj['href'].endswith('.html'):
	html_files.append(obj['href'])
	for value in obj.values():
	extract_hrefs(value)
	elif isinstance(obj, list):
	for item in obj:
	extract_hrefs(item)

	extract_hrefs(nav_data)

	# Remove duplicates and sort
	html_files = sorted(set(html_files))

	# Download special files
	special_files = [
	"searchable-landing.html",
	"offline-message.html"
	]

	for file in special_files:
	self.download_file(f"{lang}/{file}",
	self.output_dir / lang / file)

	# Download each HTML file and collect image references
	image_files = set()

	for html_file in html_files:
	html_path = self.output_dir / lang / html_file
	self.download_file(f"{lang}/{html_file}", html_path)

	# Extract image references from HTML (only for en.lproj to avoid duplicates)
	if lang == 'en.lproj' and html_path.exists():
	with open(html_path, 'r', encoding='utf-8') as f:
	html_content = f.read()

	# Find all image sources in the HTML
	img_matches = re.findall(r'src="([^"]+\.(?:png\|jpg\|jpeg\|gif))"', html_content, re.IGNORECASE)
	for img in img_matches:
	# Skip absolute URLs and icon.png references
	if not img.startswith('http') and img != 'icon.png' and not img.startswith('../'):
	image_files.add(img)

	# Fix paths in the HTML file
	self.update_html_paths(html_path)

	# Download Art directory images (only once from en.lproj)
	if lang == 'en.lproj' and image_files:
	print("\nDownloading content images...")
	for img_file in sorted(image_files):
	img_path = f"{lang}/{img_file}"
	local_img_path = os.path.join(str(self.output_dir), lang, img_file)
	self.download_file(img_path, local_img_path)

	# Copy images to all other language directories
	if os.path.exists(local_img_path):
	for other_lang in self.all_languages:
	if other_lang != lang:
	other_img_path = os.path.join(str(self.output_dir), other_lang, img_file)
	os.makedirs(os.path.dirname(other_img_path), exist_ok=True)
	shutil.copy(local_img_path, other_img_path)

	def update_resource_paths(self, file_path):
	"""Update resource paths in index.html to use local resources."""
	if not os.path.exists(file_path):
	return

	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()

	# Replace resource paths
	content = content.replace('/Library/Documentation/Resources/Flamingo/6/',
	'resources/flamingo/6/')

	with open(file_path, 'w', encoding='utf-8') as f:
	f.write(content)

	print(f"Updated resource paths in {file_path}")

	def update_html_paths(self, file_path):
	"""Update paths in HTML files."""
	if not os.path.exists(file_path):
	return

	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()

	# Only replace the app icon references, not all images
	# Look for icon references that are likely the app icon (e.g., ../txteic.png)
	content = re.sub(r'src="\.\.\/[^"]+\.png"', 'src="icon.png"', content)

	# Also handle img tags with class="topicIcon" which are usually the app icons
	content = re.sub(r'(<img[^>]class="topicIcon"[^>]src=")[^"]+(")', r'\1icon.png\2', content)

	with open(file_path, 'w', encoding='utf-8') as f:
	f.write(content)

	print(f"Updated HTML paths in {file_path}")

	def run(self):
	"""Execute the download process."""
	print(f"Starting {self.app_name} help documentation download...\n")
	print(f"URL: {self.help_url}\n")

	# Create directory structure
	self.create_directory_structure()

	# Download main HTML file
	self.download_main_html()

	# Download resource files
	self.download_resource_files()

	# Download locale files
	self.download_locale_files()

	# Download HTML files
	self.download_html_files()

	print(f"\nDownload complete! The offline documentation is available at:")
	print(f"{self.output_dir}/index.html")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Download Mavericks (10.9) Apple help documentation for offline use",
	epilog="Example: %(prog)s https://help.apple.com/keychainaccess/mac/10.9/help/ -o keychain_help"
	)
	parser.add_argument("url",
	help="The help URL (e.g., https://help.apple.com/textedit/help/10.9/help/)")
	parser.add_argument("--output", "-o", default=None,
	help="Output directory (default: app_name_help)")

	args = parser.parse_args()

	# Determine output directory
	if args.output is None:
	# Extract app name from URL
	parsed_url = urlparse(args.url)
	path_parts = parsed_url.path.strip('/').split('/')
	if path_parts:
	app_name = path_parts[0]
	args.output = f"{app_name}_help"
	else:
	args.output = "app_help"

	downloader = MavericksHelpDownloader(args.url, args.output)
	downloader.run()
No results found