Created
September 23, 2025 00:07
-
-
Save davydmaker/b14acf98e13c58f9cc679fad9686e105 to your computer and use it in GitHub Desktop.
Directory diff tool with interactive reporting and complete analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Script to recursively compare all files between two directories. | |
| Generates text reports by default, with option for HTML format. | |
| Usage: | |
| python directory_diff.py <base> <compare> [-o output] [--format {text,html}] | |
| Examples: | |
| python directory_diff.py base compare # Text report (default) | |
| python directory_diff.py base compare -o my-report # Save as my-report.txt | |
| python directory_diff.py base compare --format html # HTML report | |
| python directory_diff.py base compare --format html -o my-report # Save as my-report.html | |
| """ | |
| import os | |
| import sys | |
| import hashlib | |
| import difflib | |
| from pathlib import Path | |
| from typing import Dict, Set, Tuple, List | |
| import argparse | |
| from datetime import datetime | |
| class DirectoryComparator: | |
| """ | |
| Class to compare files and directories recursively. | |
| Handles file content comparison and report generation. | |
| """ | |
| def __init__(self, base: str, compare: str, ignore_patterns: Set[str] = None): | |
| """ | |
| Initialize comparator with base and compare directories and ignore patterns. | |
| Args: | |
| base: Base directory path (reference) | |
| compare: Compare directory path (candidate) | |
| ignore_patterns: Set of additional patterns to ignore (combined with defaults) | |
| """ | |
| self.base = Path(base).resolve() | |
| self.compare = Path(compare).resolve() | |
| # Default patterns that are always ignored | |
| default_patterns = { | |
| '.git', 'node_modules', 'logs', | |
| '*.log', '.env', 'dist', 'build', | |
| '.vscode', '.idea', '*.tmp', '*.temp' | |
| } | |
| # Combine default patterns with user-provided patterns | |
| if ignore_patterns: | |
| self.ignore_patterns = default_patterns | ignore_patterns | |
| else: | |
| self.ignore_patterns = default_patterns | |
| def should_ignore(self, path: Path) -> bool: | |
| """ | |
| Check if a file/directory should be ignored based on patterns. | |
| Args: | |
| path: Path to check | |
| Returns: | |
| bool: True if path matches ignore pattern | |
| """ | |
| for pattern in self.ignore_patterns: | |
| if pattern.startswith('*.'): | |
| if path.name.endswith(pattern[1:]): | |
| return True | |
| else: | |
| if pattern in str(path) or path.name == pattern: | |
| return True | |
| return False | |
| def get_file_hash(self, file_path: Path) -> str: | |
| """ | |
| Calculate MD5 hash of a file. | |
| Args: | |
| file_path: Path to file | |
| Returns: | |
| str: MD5 hash or error message | |
| """ | |
| try: | |
| hash_md5 = hashlib.md5() | |
| with open(file_path, "rb") as f: | |
| for chunk in iter(lambda: f.read(4096), b""): | |
| hash_md5.update(chunk) | |
| return hash_md5.hexdigest() | |
| except Exception as e: | |
| return f"ERROR: {str(e)}" | |
| def get_all_files(self, directory: Path) -> Dict[str, Path]: | |
| """ | |
| Get all files recursively in a directory. | |
| Args: | |
| directory: Root directory path | |
| Returns: | |
| Dict mapping relative paths to absolute paths | |
| """ | |
| files = {} | |
| try: | |
| for item in directory.rglob('*'): | |
| if item.is_file() and not self.should_ignore(item): | |
| relative_path = item.relative_to(directory) | |
| files[str(relative_path)] = item | |
| except Exception as e: | |
| print(f"Error reading directory {directory}: {e}") | |
| return files | |
| def compare_files_content(self, file1: Path, file2: Path) -> Tuple[bool, str]: | |
| """ | |
| Compare content of two files. | |
| Args: | |
| file1: First file path | |
| file2: Second file path | |
| Returns: | |
| Tuple of (is_same, diff_info) | |
| """ | |
| try: | |
| hash1 = self.get_file_hash(file1) | |
| hash2 = self.get_file_hash(file2) | |
| if hash1.startswith("ERROR") or hash2.startswith("ERROR"): | |
| return False, f"Error reading files: {hash1}, {hash2}" | |
| if hash1 == hash2: | |
| return True, "Files are identical" | |
| try: | |
| with open(file1, 'r', encoding='utf-8') as f1, open(file2, 'r', encoding='utf-8') as f2: | |
| lines1 = f1.readlines() | |
| lines2 = f2.readlines() | |
| base_name = self.base.name | |
| compare_name = self.compare.name | |
| rel_path1 = file1.relative_to(self.base) | |
| rel_path2 = file2.relative_to(self.compare) | |
| diff = list(difflib.unified_diff( | |
| lines1, lines2, | |
| fromfile=f"{base_name}/{rel_path1}", | |
| tofile=f"{compare_name}/{rel_path2}", | |
| lineterm='' | |
| )) | |
| if diff: | |
| return False, '\n'.join(diff) | |
| else: | |
| return True, "Files are identical" | |
| except UnicodeDecodeError: | |
| return False, "Binary files differ" | |
| except Exception as e: | |
| return False, f"Error comparing files: {str(e)}" | |
| def _get_comparison_data(self): | |
| """ | |
| Get comparison data for report generation. | |
| Returns: | |
| Tuple containing (only_in_base, only_in_compare, different_files, identical_files) | |
| """ | |
| print("Starting directory comparison...") | |
| print(f"Base: {self.base}") | |
| print(f"Compare: {self.compare}\n") | |
| files1 = self.get_all_files(self.base) | |
| files2 = self.get_all_files(self.compare) | |
| all_files = set(files1.keys()) | set(files2.keys()) | |
| only_in_base = [] | |
| only_in_compare = [] | |
| different_files = [] | |
| identical_files = [] | |
| print(f"Total unique files found: {len(all_files)}") | |
| print("Processing...\n") | |
| for relative_path in sorted(all_files): | |
| if relative_path in files1 and relative_path in files2: | |
| file1 = files1[relative_path] | |
| file2 = files2[relative_path] | |
| is_same, diff_info = self.compare_files_content(file1, file2) | |
| if is_same: | |
| identical_files.append(relative_path) | |
| else: | |
| different_files.append({ | |
| 'path': relative_path, | |
| 'file1': file1, | |
| 'file2': file2, | |
| 'diff': diff_info | |
| }) | |
| elif relative_path in files1: | |
| only_in_base.append(relative_path) | |
| else: | |
| only_in_compare.append(relative_path) | |
| return only_in_base, only_in_compare, different_files, identical_files | |
| def _count_diff_lines(self, diff_content: str) -> int: | |
| """Count meaningful diff lines (excluding headers).""" | |
| lines = diff_content.split('\n') | |
| meaningful_lines = 0 | |
| for line in lines: | |
| if line.startswith(('+', '-')) and not line.startswith(('+++', '---')): | |
| meaningful_lines += 1 | |
| return meaningful_lines | |
| def _format_diff_for_html(self, diff_content: str) -> str: | |
| """Format diff content for HTML with syntax highlighting.""" | |
| lines = diff_content.split('\n') | |
| formatted_lines = [] | |
| for line in lines: | |
| if not line.strip(): | |
| continue | |
| escaped_line = line.replace('&', '&').replace('<', '<').replace('>', '>') | |
| if line.startswith('+++') or line.startswith('---'): | |
| formatted_lines.append(f'<div class="diff-line diff-line-header">{escaped_line}</div>') | |
| elif line.startswith('@@'): | |
| formatted_lines.append(f'<div class="diff-line diff-line-header">{escaped_line}</div>') | |
| elif line.startswith('+'): | |
| content = escaped_line[1:] if len(escaped_line) > 1 else '' | |
| formatted_lines.append(f'<div class="diff-line diff-line-added">{content}</div>') | |
| elif line.startswith('-'): | |
| content = escaped_line[1:] if len(escaped_line) > 1 else '' | |
| formatted_lines.append(f'<div class="diff-line diff-line-removed">{content}</div>') | |
| elif line.startswith(' '): | |
| content = escaped_line[1:] if len(escaped_line) > 1 else '' | |
| formatted_lines.append(f'<div class="diff-line diff-line-context">{content}</div>') | |
| elif line.strip(): | |
| formatted_lines.append(f'<div class="diff-line diff-line-context">{escaped_line}</div>') | |
| return ''.join(formatted_lines) | |
| def generate_text_report(self) -> str: | |
| """ | |
| Generate clean text comparison report. | |
| Returns: | |
| str: Formatted report text | |
| """ | |
| only_in_base, only_in_compare, different_files, identical_files = self._get_comparison_data() | |
| report = [] | |
| report.append("="*80) | |
| report.append("DIRECTORY COMPARISON REPORT") | |
| report.append("="*80) | |
| report.append(f"Date/Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| report.append(f"Base: {self.base}") | |
| report.append(f"Compare: {self.compare}") | |
| report.append("") | |
| report.append("SUMMARY:") | |
| report.append(f" Identical files: {len(identical_files)}") | |
| report.append(f" Different files: {len(different_files)}") | |
| report.append(f" Files only in base: {len(only_in_base)}") | |
| report.append(f" Files only in compare: {len(only_in_compare)}") | |
| report.append("") | |
| if only_in_base: | |
| report.append("FILES ONLY IN BASE:") | |
| report.append("-" * 50) | |
| for file_path in sorted(only_in_base): | |
| report.append(f" {file_path}") | |
| report.append("") | |
| if only_in_compare: | |
| report.append("FILES ONLY IN COMPARE:") | |
| report.append("-" * 50) | |
| for file_path in sorted(only_in_compare): | |
| report.append(f" {file_path}") | |
| report.append("") | |
| if different_files: | |
| report.append("DIFFERENT FILES:") | |
| report.append("-" * 50) | |
| for file_info in different_files: | |
| report.append(f"File: {file_info['path']}") | |
| report.append(f" base: {file_info['file1']}") | |
| report.append(f" compare: {file_info['file2']}") | |
| diff_lines_count = self._count_diff_lines(file_info['diff']) | |
| if diff_lines_count <= 100: | |
| report.append(" Differences:") | |
| diff_lines = file_info['diff'].split('\n') | |
| for line in diff_lines: | |
| if line.startswith(('@@', '---', '+++')): | |
| report.append(f" {line}") | |
| elif line.startswith(('+', '-')): | |
| report.append(f" {line}") | |
| elif line.strip() and not line.startswith(' '): | |
| report.append(f" {line}") | |
| else: | |
| report.append(f" Differences: {diff_lines_count} lines (too long to display)") | |
| report.append("") | |
| report.append("="*80) | |
| return '\n'.join(report) | |
| def generate_html_report(self) -> str: | |
| """ | |
| Generate HTML comparison report. | |
| Returns: | |
| str: HTML formatted report | |
| """ | |
| only_in_base, only_in_compare, different_files, identical_files = self._get_comparison_data() | |
| html = f"""<html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>Directory Comparison Report</title> | |
| <style> | |
| body {{ | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| line-height: 1.6; | |
| margin: 0; | |
| padding: 20px; | |
| background-color: #f5f5f5; | |
| }} | |
| .container {{ | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| background: white; | |
| padding: 30px; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); | |
| }} | |
| h1 {{ | |
| color: #333; | |
| border-bottom: 3px solid #007acc; | |
| padding-bottom: 10px; | |
| margin-bottom: 30px; | |
| }} | |
| h2 {{ | |
| color: #555; | |
| margin-top: 30px; | |
| border-left: 4px solid #007acc; | |
| padding-left: 15px; | |
| }} | |
| .summary {{ | |
| background: #f8f9fa; | |
| padding: 20px; | |
| border-radius: 5px; | |
| margin-bottom: 30px; | |
| }} | |
| .summary-item {{ | |
| display: inline-block; | |
| margin: 10px 20px 10px 0; | |
| padding: 10px 15px; | |
| background: white; | |
| border-radius: 5px; | |
| border-left: 4px solid #007acc; | |
| }} | |
| .file-list {{ | |
| background: #f8f9fa; | |
| padding: 15px; | |
| border-radius: 5px; | |
| margin: 15px 0; | |
| }} | |
| .file-item {{ | |
| margin: 5px 0; | |
| padding: 8px; | |
| background: white; | |
| border-radius: 3px; | |
| font-family: monospace; | |
| }} | |
| .diff-file {{ | |
| border-left: 4px solid #ffc107; | |
| background: #fff3cd; | |
| }} | |
| .added-file {{ | |
| border-left: 4px solid #28a745; | |
| background: #d4edda; | |
| }} | |
| .removed-file {{ | |
| border-left: 4px solid #dc3545; | |
| background: #f8d7da; | |
| }} | |
| .diff-content {{ | |
| background: #f8f9fa; | |
| border: 1px solid #e9ecef; | |
| border-radius: 5px; | |
| padding: 0; | |
| margin: 10px 0; | |
| font-family: 'Consolas', 'Monaco', 'Courier New', monospace; | |
| font-size: 13px; | |
| max-height: 400px; | |
| overflow-y: auto; | |
| line-height: 1.4; | |
| }} | |
| .diff-line {{ | |
| padding: 2px 10px; | |
| margin: 0; | |
| white-space: pre-wrap; | |
| border-left: 3px solid transparent; | |
| }} | |
| .diff-line-added {{ | |
| background-color: #e6ffed; | |
| border-left-color: #28a745; | |
| color: #22863a; | |
| }} | |
| .diff-line-added::before {{ | |
| content: "+"; | |
| color: #28a745; | |
| font-weight: bold; | |
| margin-right: 8px; | |
| }} | |
| .diff-line-removed {{ | |
| background-color: #ffeef0; | |
| border-left-color: #d73a49; | |
| color: #b31d28; | |
| }} | |
| .diff-line-removed::before {{ | |
| content: "-"; | |
| color: #d73a49; | |
| font-weight: bold; | |
| margin-right: 8px; | |
| }} | |
| .diff-line-context {{ | |
| background-color: #f8f9fa; | |
| color: #586069; | |
| }} | |
| .diff-line-context::before {{ | |
| content: " "; | |
| margin-right: 8px; | |
| }} | |
| .diff-line-header {{ | |
| background-color: #f1f8ff; | |
| color: #0366d6; | |
| font-weight: bold; | |
| border-left-color: #0366d6; | |
| }} | |
| .diff-line:hover {{ | |
| background-color: rgba(255, 255, 255, 0.1); | |
| }} | |
| .meta {{ | |
| color: #666; | |
| font-size: 12px; | |
| margin-bottom: 10px; | |
| }} | |
| .collapsible {{ | |
| cursor: pointer; | |
| user-select: none; | |
| }} | |
| .collapsible:hover {{ | |
| background: #e9ecef; | |
| }} | |
| .content {{ | |
| display: none; | |
| }} | |
| .content.active {{ | |
| display: block; | |
| }} | |
| .toggle {{ | |
| float: right; | |
| font-weight: bold; | |
| }} | |
| </style> | |
| <script> | |
| function toggleContent(element) {{ | |
| const content = element.nextElementSibling; | |
| const toggle = element.querySelector('.toggle'); | |
| if (content.classList.contains('active')) {{ | |
| content.classList.remove('active'); | |
| toggle.textContent = '+'; | |
| }} else {{ | |
| content.classList.add('active'); | |
| toggle.textContent = '-'; | |
| }} | |
| }} | |
| </script> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>Directory Comparison Report</h1> | |
| <div class="meta"> | |
| Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}<br> | |
| Base: <code>{self.base}</code><br> | |
| Compare: <code>{self.compare}</code> | |
| </div> | |
| <div class="summary"> | |
| <h2>Summary</h2> | |
| <div class="summary-item"> | |
| <strong>{len(identical_files)}</strong><br>Identical files | |
| </div> | |
| <div class="summary-item"> | |
| <strong>{len(different_files)}</strong><br>Different files | |
| </div> | |
| <div class="summary-item"> | |
| <strong>{len(only_in_base)}</strong><br>Only in base | |
| </div> | |
| <div class="summary-item"> | |
| <strong>{len(only_in_compare)}</strong><br>Only in compare | |
| </div> | |
| </div>""" | |
| if only_in_base: | |
| html += f""" | |
| <h2>Files Only in Base</h2> | |
| <div class="file-list">""" | |
| for file_path in sorted(only_in_base): | |
| html += f'<div class="file-item removed-file">{file_path}</div>' | |
| html += "</div>" | |
| if only_in_compare: | |
| html += f""" | |
| <h2>Files Only in Compare</h2> | |
| <div class="file-list">""" | |
| for file_path in sorted(only_in_compare): | |
| html += f'<div class="file-item added-file">{file_path}</div>' | |
| html += "</div>" | |
| if different_files: | |
| html += """ | |
| <h2>Different Files</h2> | |
| <div class="file-list">""" | |
| for file_info in different_files: | |
| diff_lines_count = self._count_diff_lines(file_info['diff']) | |
| html += f""" | |
| <div class="file-item diff-file"> | |
| <div class="collapsible" onclick="toggleContent(this)"> | |
| <strong>{file_info['path']}</strong> | |
| <span class="toggle">+</span> | |
| </div> | |
| <div class="content"> | |
| <div class="meta"> | |
| Base: {file_info['file1']}<br> | |
| Compare: {file_info['file2']}<br> | |
| Changes: {diff_lines_count} lines | |
| </div>""" | |
| if diff_lines_count <= 100: | |
| formatted_diff = self._format_diff_for_html(file_info['diff']) | |
| if formatted_diff: | |
| html += f'<div class="diff-content">{formatted_diff}</div>' | |
| else: | |
| html += f'<div class="diff-content"><div class="diff-line diff-line-context">Differences too long to display ({diff_lines_count} lines)</div></div>' | |
| html += "</div></div>" | |
| html += "</div>" | |
| html += """ | |
| </div> | |
| </body> | |
| </html>""" | |
| return html | |
| def main(): | |
| """Main entry point for the script.""" | |
| parser = argparse.ArgumentParser( | |
| description="Recursively compare all files between two directories" | |
| ) | |
| parser.add_argument("base", help="Base directory (reference, e.g. main branch)") | |
| parser.add_argument("compare", help="Compare directory (candidate, e.g. dev branch)") | |
| parser.add_argument("-o", "--output", help="Output file for report (without extension)") | |
| parser.add_argument("--format", choices=["text", "html"], default="text", | |
| help="Output format: text (default) or html") | |
| parser.add_argument("--ignore", nargs="*", help="Additional patterns to ignore (combined with defaults)") | |
| args = parser.parse_args() | |
| if not os.path.exists(args.base): | |
| print(f"Error: Directory '{args.base}' does not exist") | |
| sys.exit(1) | |
| if not os.path.exists(args.compare): | |
| print(f"Error: Directory '{args.compare}' does not exist") | |
| sys.exit(1) | |
| ignore_patterns = None | |
| if args.ignore: | |
| ignore_patterns = set(args.ignore) | |
| comparator = DirectoryComparator(args.base, args.compare, ignore_patterns) | |
| if args.format == "text": | |
| text_report = comparator.generate_text_report() | |
| if args.output: | |
| text_file = f"{args.output}.txt" | |
| with open(text_file, 'w', encoding='utf-8') as f: | |
| f.write(text_report) | |
| print(f"Text report saved to: {text_file}") | |
| else: | |
| html_report = comparator.generate_html_report() | |
| if args.output: | |
| html_file = f"{args.output}.html" | |
| with open(html_file, 'w', encoding='utf-8') as f: | |
| f.write(html_report) | |
| print(f"HTML report saved to: {html_file}") | |
| print(f"Open in browser: file://{os.path.abspath(html_file)}") | |
| else: | |
| html_file = "comparison_report.html" | |
| with open(html_file, 'w', encoding='utf-8') as f: | |
| f.write(html_report) | |
| print(f"HTML report saved to: {html_file}") | |
| print(f"Open in browser: file://{os.path.abspath(html_file)}") | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Created to resolve divergent Git branch histories where traditional diff tools weren't sufficient. This script helps identify all code differences between directories/branches, making it easier to reconcile changes and recreate clean branch structures.
Generates detailed reports (text/HTML) for systematic review and application of differences.