Created
June 21, 2025 22:12
-
-
Save Klaudioz/aa0599ebebbe3a1c04171fb6a43c750d to your computer and use it in GitHub Desktop.
standalone Python script that replicates the core functionality of the gitingest tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| gitingest.py - Turn any Git repository into a prompt-friendly text and copy to clipboard. | |
| Usage: | |
| python gitingest.py <URL/LOCAL_FOLDER> [options] | |
| Options: | |
| --exclude PATTERN Patterns to exclude (can be used multiple times) | |
| --max-size SIZE Include files under SIZE kb (default: 50kb) | |
| Examples: | |
| python gitingest.py https://github.com/user/repo | |
| python gitingest.py ./local-folder --exclude "*.log" --exclude "tests/*" | |
| python gitingest.py https://github.com/user/repo --max-size 100 | |
| """ | |
| import os | |
| import sys | |
| import platform | |
| import subprocess | |
| import tempfile | |
| import shutil | |
| import argparse | |
| from pathlib import Path | |
| from typing import Set, Tuple, Optional | |
| import re | |
| from fnmatch import fnmatch | |
| import uuid | |
| # Default ignore patterns (from gitingest) | |
| DEFAULT_IGNORE_PATTERNS = { | |
| # Python | |
| "*.pyc", "*.pyo", "*.pyd", "__pycache__", ".pytest_cache", ".coverage", | |
| ".tox", ".nox", ".mypy_cache", ".ruff_cache", ".hypothesis", "poetry.lock", | |
| "Pipfile.lock", | |
| # JavaScript/Node | |
| "node_modules", "bower_components", "package-lock.json", "yarn.lock", | |
| ".npm", ".yarn", ".pnpm-store", "bun.lock", "bun.lockb", | |
| # Java | |
| "*.class", "*.jar", "*.war", "*.ear", "*.nar", ".gradle/", "build/", | |
| ".settings/", ".classpath", "gradle-app.setting", "*.gradle", | |
| # IDEs and editors / Java | |
| ".project", | |
| # C/C++ | |
| "*.o", "*.obj", "*.dll", "*.dylib", "*.exe", "*.lib", "*.out", "*.a", "*.pdb", | |
| # Swift/Xcode | |
| ".build/", "*.xcodeproj/", "*.xcworkspace/", "*.pbxuser", "*.mode1v3", | |
| "*.mode2v3", "*.perspectivev3", "*.xcuserstate", "xcuserdata/", ".swiftpm/", | |
| # Ruby | |
| "*.gem", ".bundle/", "vendor/bundle", "Gemfile.lock", ".ruby-version", | |
| ".ruby-gemset", ".rvmrc", | |
| # Rust | |
| "Cargo.lock", "**/*.rs.bk", | |
| # Java / Rust | |
| "target/", | |
| # Go | |
| "pkg/", | |
| # .NET/C# | |
| "obj/", "*.suo", "*.user", "*.userosscache", "*.sln.docstates", "packages/", | |
| "*.nupkg", | |
| # Go / .NET / C# | |
| "bin/", | |
| # Version control | |
| ".git", ".svn", ".hg", ".gitignore", ".gitattributes", ".gitmodules", | |
| # Images and media | |
| "*.svg", "*.png", "*.jpg", "*.jpeg", "*.gif", "*.ico", "*.pdf", "*.mov", | |
| "*.mp4", "*.mp3", "*.wav", | |
| # Virtual environments | |
| "venv", ".venv", "env", ".env", "virtualenv", | |
| # IDEs and editors | |
| ".idea", ".vscode", ".vs", "*.swo", "*.swn", ".settings", "*.sublime-*", | |
| # Temporary and cache files | |
| "*.log", "*.bak", "*.swp", "*.tmp", "*.temp", ".cache", ".sass-cache", | |
| ".eslintcache", ".DS_Store", "Thumbs.db", "desktop.ini", | |
| # Build directories and artifacts | |
| "build", "dist", "target", "out", "*.egg-info", "*.egg", "*.whl", "*.so", | |
| # Documentation | |
| "site-packages", ".docusaurus", ".next", ".nuxt", | |
| # Other common patterns | |
| "*.min.js", "*.min.css", "*.map", ".terraform", "*.tfstate*", "vendor/", | |
| # Gitingest | |
| "digest.txt", | |
| } | |
| SEPARATOR = "=" * 48 | |
| def copy_to_clipboard(text: str) -> bool: | |
| """Copy text to system clipboard.""" | |
| system = platform.system() | |
| try: | |
| if system == "Darwin": # macOS | |
| process = subprocess.Popen(['pbcopy'], stdin=subprocess.PIPE) | |
| process.communicate(text.encode('utf-8')) | |
| return process.returncode == 0 | |
| elif system == "Windows": | |
| process = subprocess.Popen(['clip'], stdin=subprocess.PIPE, shell=True) | |
| process.communicate(text.encode('utf-8')) | |
| return process.returncode == 0 | |
| else: # Linux/Unix | |
| # Try xclip first | |
| try: | |
| process = subprocess.Popen(['xclip', '-selection', 'clipboard'], stdin=subprocess.PIPE) | |
| process.communicate(text.encode('utf-8')) | |
| return process.returncode == 0 | |
| except FileNotFoundError: | |
| # Try xsel if xclip is not available | |
| try: | |
| process = subprocess.Popen(['xsel', '--clipboard', '--input'], stdin=subprocess.PIPE) | |
| process.communicate(text.encode('utf-8')) | |
| return process.returncode == 0 | |
| except FileNotFoundError: | |
| print("Error: Neither xclip nor xsel found. Please install one of them:") | |
| print(" sudo apt-get install xclip # or") | |
| print(" sudo apt-get install xsel") | |
| return False | |
| except Exception as e: | |
| print(f"Error copying to clipboard: {e}") | |
| return False | |
| def estimate_tokens(text: str) -> str: | |
| """Estimate token count for the text.""" | |
| # Simple approximation: ~4 characters per token (rough estimate for English text) | |
| # This is a simplified version - the actual gitingest uses tiktoken | |
| char_count = len(text) | |
| token_estimate = char_count // 4 | |
| if token_estimate >= 1_000_000: | |
| return f"{token_estimate / 1_000_000:.1f}M" | |
| elif token_estimate >= 1_000: | |
| return f"{token_estimate / 1_000:.1f}k" | |
| else: | |
| return str(token_estimate) | |
| def is_text_file(path: Path) -> bool: | |
| """Check if a file is likely a text file.""" | |
| try: | |
| with path.open('rb') as f: | |
| chunk = f.read(1024) | |
| if not chunk: | |
| return True | |
| # Check for binary markers | |
| if b'\x00' in chunk or b'\xff' in chunk: | |
| return False | |
| # Try to decode as UTF-8 | |
| try: | |
| chunk.decode('utf-8') | |
| return True | |
| except UnicodeDecodeError: | |
| return False | |
| except: | |
| return False | |
| def should_exclude(path: Path, base_path: Path, ignore_patterns: Set[str]) -> bool: | |
| """Check if a path should be excluded based on patterns.""" | |
| try: | |
| rel_path = path.relative_to(base_path) | |
| except ValueError: | |
| return True | |
| rel_str = str(rel_path) | |
| for pattern in ignore_patterns: | |
| if pattern and fnmatch(rel_str, pattern): | |
| return True | |
| return False | |
| def clone_repo(url: str, target_path: Path) -> bool: | |
| """Clone a git repository to the target path.""" | |
| try: | |
| subprocess.run(['git', 'clone', '--depth=1', url, str(target_path)], | |
| check=True, capture_output=True, text=True) | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| print(f"Error cloning repository: {e.stderr}") | |
| return False | |
| except FileNotFoundError: | |
| print("Error: git is not installed. Please install git first.") | |
| return False | |
| def process_directory(path: Path, max_file_size: int, ignore_patterns: Set[str]) -> Tuple[str, str, int]: | |
| """Process a directory and return tree structure, content, and file count.""" | |
| tree_lines = [] | |
| content_lines = [] | |
| file_count = 0 | |
| def build_tree(current_path: Path, prefix: str = "", is_last: bool = True): | |
| nonlocal file_count | |
| if should_exclude(current_path, path, ignore_patterns): | |
| return | |
| # Add current item to tree | |
| current_prefix = "└── " if is_last else "├── " | |
| name = current_path.name | |
| if current_path.is_dir(): | |
| name += "/" | |
| if current_path != path: # Don't add the root directory itself | |
| tree_lines.append(f"{prefix}{current_prefix}{name}") | |
| if current_path.is_file(): | |
| # Check file size | |
| if current_path.stat().st_size <= max_file_size: | |
| if is_text_file(current_path): | |
| file_count += 1 | |
| # Add file content | |
| try: | |
| content = current_path.read_text(encoding='utf-8') | |
| relative_path = current_path.relative_to(path) | |
| content_lines.extend([ | |
| "", | |
| SEPARATOR, | |
| f"FILE: {relative_path}", | |
| SEPARATOR, | |
| content | |
| ]) | |
| except Exception as e: | |
| content_lines.extend([ | |
| "", | |
| SEPARATOR, | |
| f"FILE: {relative_path}", | |
| SEPARATOR, | |
| f"[Error reading file: {e}]" | |
| ]) | |
| elif current_path.is_dir(): | |
| # Process subdirectory | |
| items = sorted(current_path.iterdir(), key=lambda x: (x.is_file(), x.name.lower())) | |
| # Filter out excluded items | |
| items = [item for item in items if not should_exclude(item, path, ignore_patterns)] | |
| new_prefix = prefix + (" " if is_last else "│ ") | |
| for i, item in enumerate(items): | |
| build_tree(item, new_prefix, i == len(items) - 1) | |
| # Start building the tree | |
| tree_lines.append(f"{path.name}/") | |
| build_tree(path) | |
| tree = "\n".join(tree_lines) | |
| content = "\n".join(content_lines) | |
| return tree, content, file_count | |
| def parse_url(url: str) -> Tuple[str, str]: | |
| """Parse repository URL to extract user and repo name.""" | |
| # Remove .git suffix if present | |
| url = url.rstrip('/').removesuffix('.git') | |
| # Extract path from URL | |
| if url.startswith(('http://', 'https://')): | |
| parts = url.split('/') | |
| if len(parts) >= 5: | |
| user = parts[-2] | |
| repo = parts[-1] | |
| return user, repo | |
| raise ValueError("Invalid repository URL") | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Turn any Git repository into a prompt-friendly text and copy to clipboard.", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog="""Examples: | |
| python gitingest.py https://github.com/user/repo | |
| python gitingest.py ./local-folder --exclude "*.log" --exclude "tests/*" | |
| python gitingest.py https://github.com/user/repo --max-size 100""") | |
| parser.add_argument('source', help='Repository URL or local folder path') | |
| parser.add_argument('--exclude', action='append', default=[], | |
| help='Patterns to exclude (can be used multiple times)') | |
| parser.add_argument('--max-size', type=int, default=50, | |
| help='Include files under SIZE kb (default: 50kb)') | |
| args = parser.parse_args() | |
| # Convert max-size from kb to bytes | |
| max_file_size = args.max_size * 1024 | |
| # Prepare ignore patterns | |
| ignore_patterns = DEFAULT_IGNORE_PATTERNS.copy() | |
| for pattern in args.exclude: | |
| ignore_patterns.add(pattern) | |
| # Determine if source is URL or local path | |
| is_url = args.source.startswith(('http://', 'https://')) | |
| temp_dir = None | |
| try: | |
| if is_url: | |
| # Clone repository to temporary directory | |
| print(f"Cloning repository: {args.source}") | |
| temp_dir = tempfile.mkdtemp(prefix='gitingest_') | |
| # Extract repo name for the folder | |
| try: | |
| user, repo = parse_url(args.source) | |
| repo_dir = Path(temp_dir) / f"{user}-{repo}" | |
| except: | |
| repo_dir = Path(temp_dir) / "repo" | |
| if not clone_repo(args.source, repo_dir): | |
| sys.exit(1) | |
| source_path = repo_dir | |
| repo_name = f"{user}/{repo}" if 'user' in locals() else args.source | |
| else: | |
| # Use local directory | |
| source_path = Path(args.source).resolve() | |
| if not source_path.exists(): | |
| print(f"Error: Path '{args.source}' does not exist") | |
| sys.exit(1) | |
| if not source_path.is_dir(): | |
| print(f"Error: '{args.source}' is not a directory") | |
| sys.exit(1) | |
| repo_name = source_path.name | |
| print(f"Processing: {repo_name}") | |
| print(f"Max file size: {args.max_size}kb") | |
| if args.exclude: | |
| print(f"Additional exclude patterns: {', '.join(args.exclude)}") | |
| # Process the directory | |
| tree, content, file_count = process_directory(source_path, max_file_size, ignore_patterns) | |
| # Prepare the output | |
| full_content = f"Directory structure:\n{tree}\n\n\nFiles Content:\n{content}" | |
| # Estimate tokens | |
| token_estimate = estimate_tokens(full_content) | |
| # Create summary | |
| summary = f"Repository: {repo_name}\n" | |
| summary += f"Files analyzed: {file_count}\n" | |
| summary += f"Estimated tokens: {token_estimate}" | |
| print("\n" + summary) | |
| # Copy to clipboard | |
| print("\nCopying to clipboard...") | |
| if copy_to_clipboard(full_content): | |
| print("✓ Content copied to clipboard successfully!") | |
| else: | |
| print("✗ Failed to copy to clipboard") | |
| # Optionally save to file as fallback | |
| fallback_file = Path("gitingest_output.txt") | |
| fallback_file.write_text(full_content, encoding='utf-8') | |
| print(f"Content saved to: {fallback_file}") | |
| finally: | |
| # Cleanup temporary directory if created | |
| if temp_dir and Path(temp_dir).exists(): | |
| shutil.rmtree(temp_dir) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment