Created
October 31, 2025 11:25
-
-
Save manchicken/d57a3ee42a41e804348fc71ebcceab06 to your computer and use it in GitHub Desktop.
Script to search a folder for files to make an inventory to scan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| File Inventory Generator for Forensic Analysis | |
| Scans folders recursively, identifies potentially malicious files, | |
| and generates an inventory CSV for further analysis. | |
| """ | |
| import os | |
| import sys | |
| import csv | |
| import mimetypes | |
| import subprocess | |
| import zipfile | |
| from pathlib import Path | |
| # Potentially dangerous executable extensions | |
| EXECUTABLE_EXTENSIONS = { | |
| # Windows executables | |
| '.exe', '.dll', '.sys', '.drv', '.ocx', '.scr', '.cpl', | |
| # Scripts | |
| '.bat', '.cmd', '.com', '.ps1', '.vbs', '.vbe', '.js', '.jse', | |
| '.wsf', '.wsh', '.msi', '.msp', '.hta', '.jar', | |
| # Macros and office with macros | |
| '.xlsm', '.xlam', '.xltm', '.docm', '.dotm', '.pptm', '.potm', '.ppam', '.ppsm', | |
| # Linux/Unix executables | |
| '.sh', '.bin', '.run', '.elf', '.so', | |
| # Other potentially dangerous | |
| '.app', '.deb', '.rpm', '.dmg', '.pkg', '.apk' | |
| } | |
| # Configuration files that could be abused | |
| CONFIG_EXTENSIONS = { | |
| '.ini', '.conf', '.config', '.cfg', '.xml', '.json', '.yaml', '.yml', | |
| '.reg', '.plist', '.toml', '.properties' | |
| } | |
| # Document extensions (for mismatch detection) | |
| DOCUMENT_EXTENSIONS = { | |
| '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', | |
| '.odt', '.ods', '.odp', '.rtf' | |
| } | |
| # Archive extensions to extract | |
| ARCHIVE_EXTENSIONS = { | |
| '.zip' | |
| } | |
| def get_mime_by_extension(file_path): | |
| """Get MIME type based on file extension""" | |
| mime_type, _ = mimetypes.guess_type(file_path) | |
| return mime_type or "unknown" | |
| def get_actual_mime_type(file_path): | |
| """Get actual MIME type using the file command""" | |
| try: | |
| result = subprocess.run( | |
| ['file', '--mime-type', '-b', file_path], | |
| capture_output=True, | |
| text=True, | |
| timeout=5 | |
| ) | |
| return result.stdout.strip() | |
| except Exception as e: | |
| return f"error: {str(e)}" | |
| def human_readable_size(size_bytes): | |
| """Convert bytes to human-readable format""" | |
| for unit in ['B', 'KB', 'MB', 'GB', 'TB']: | |
| if size_bytes < 1024.0: | |
| return f"{size_bytes:.2f} {unit}" | |
| size_bytes /= 1024.0 | |
| return f"{size_bytes:.2f} PB" | |
| def should_include_file(file_path, ext, mime_by_ext, actual_mime): | |
| """Determine if file should be included in inventory""" | |
| ext_lower = ext.lower() | |
| # Include executables | |
| if ext_lower in EXECUTABLE_EXTENSIONS: | |
| return True | |
| # Include config files | |
| if ext_lower in CONFIG_EXTENSIONS: | |
| return True | |
| # Include documents with mismatched MIME types | |
| if ext_lower in DOCUMENT_EXTENSIONS: | |
| # Check if actual MIME doesn't match expected document type | |
| if 'application/zip' in actual_mime or 'application/x-' in actual_mime: | |
| return True # Suspicious mismatch | |
| if 'text/' in actual_mime and ext_lower not in {'.txt', '.rtf'}: | |
| return True # Document claiming to be text | |
| # Include files with executable MIME types regardless of extension | |
| if any(x in actual_mime.lower() for x in [ | |
| 'application/x-executable', | |
| 'application/x-sharedlib', | |
| 'application/x-mach-binary', | |
| 'application/x-dosexec', | |
| 'application/x-msdownload' | |
| ]): | |
| return True | |
| # Include scripts | |
| if any(x in actual_mime.lower() for x in [ | |
| 'text/x-shellscript', | |
| 'text/x-python', | |
| 'text/x-perl', | |
| 'application/x-javascript' | |
| ]): | |
| return True | |
| return False | |
| def extract_zip_in_place(zip_path): | |
| """Extract ZIP file to a folder with the same name""" | |
| try: | |
| # Create extraction folder | |
| extract_folder = zip_path.rsplit('.', 1)[0] | |
| os.makedirs(extract_folder, exist_ok=True) | |
| # Extract | |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| zip_ref.extractall(extract_folder) | |
| print(f"[+] Extracted: {zip_path} -> {extract_folder}") | |
| return extract_folder | |
| except Exception as e: | |
| print(f"[!] Failed to extract {zip_path}: {e}") | |
| return None | |
| def scan_folder(input_folder, output_folder): | |
| """Main scanning function""" | |
| print(f"[*] Starting inventory scan of: {input_folder}") | |
| print(f"[*] Output folder: {output_folder}") | |
| # Ensure output folder exists | |
| os.makedirs(output_folder, exist_ok=True) | |
| # Output CSV | |
| inventory_csv = os.path.join(output_folder, "inventory.csv") | |
| # CSV columns | |
| fieldnames = [ | |
| 'File', | |
| 'Type by Extension', | |
| 'Actual Type', | |
| 'Size' | |
| ] | |
| # Track processed files to avoid duplicates | |
| processed_files = set() | |
| inventory_rows = [] | |
| # Queue of folders to process (for handling extracted ZIPs) | |
| folders_to_scan = [input_folder] | |
| while folders_to_scan: | |
| current_folder = folders_to_scan.pop(0) | |
| print(f"\n[*] Scanning: {current_folder}") | |
| for root, dirs, files in os.walk(current_folder): | |
| for filename in files: | |
| file_path = os.path.join(root, filename) | |
| # Skip if already processed | |
| if file_path in processed_files: | |
| continue | |
| processed_files.add(file_path) | |
| # Get file info | |
| try: | |
| file_size = os.path.getsize(file_path) | |
| ext = os.path.splitext(filename)[1] | |
| # Handle ZIP files - extract and queue for scanning | |
| if ext.lower() in ARCHIVE_EXTENSIONS: | |
| print(f"[*] Found archive: {file_path}") | |
| extracted_folder = extract_zip_in_place(file_path) | |
| if extracted_folder: | |
| folders_to_scan.append(extracted_folder) | |
| continue # Don't add ZIP to inventory, scan contents instead | |
| # Get MIME types | |
| mime_by_ext = get_mime_by_extension(file_path) | |
| actual_mime = get_actual_mime_type(file_path) | |
| # Check if file should be included | |
| if should_include_file(file_path, ext, mime_by_ext, actual_mime): | |
| row = { | |
| 'File': file_path, | |
| 'Type by Extension': mime_by_ext, | |
| 'Actual Type': actual_mime, | |
| 'Size': human_readable_size(file_size) | |
| } | |
| inventory_rows.append(row) | |
| print(f" [+] Added: {filename} ({actual_mime})") | |
| except Exception as e: | |
| print(f" [!] Error processing {file_path}: {e}") | |
| # Write inventory CSV | |
| with open(inventory_csv, 'w', newline='') as f: | |
| writer = csv.DictWriter(f, fieldnames=fieldnames) | |
| writer.writeheader() | |
| writer.writerows(inventory_rows) | |
| # Summary | |
| print(f"\n{'='*60}") | |
| print(f"[+] INVENTORY COMPLETE") | |
| print(f"{'='*60}") | |
| print(f"Total files in inventory: {len(inventory_rows)}") | |
| print(f"Inventory saved to: {inventory_csv}") | |
| print(f"{'='*60}") | |
| def main(): | |
| """Main entry point""" | |
| if len(sys.argv) != 3: | |
| print("Usage: make-inventory <input_folder> <output_folder>") | |
| print("\nExample:") | |
| print(" make-inventory /path/to/scan /path/to/results") | |
| sys.exit(1) | |
| input_folder = sys.argv[1] | |
| output_folder = sys.argv[2] | |
| # Validate input folder exists | |
| if not os.path.isdir(input_folder): | |
| print(f"Error: Input folder does not exist: {input_folder}") | |
| sys.exit(1) | |
| # Check if 'file' command is available | |
| try: | |
| subprocess.run(['file', '--version'], capture_output=True, check=True) | |
| except (subprocess.CalledProcessError, FileNotFoundError): | |
| print("Error: 'file' command not found. Please install it:") | |
| print(" Ubuntu/Debian: sudo apt-get install file") | |
| print(" Fedora/RHEL: sudo dnf install file") | |
| sys.exit(1) | |
| scan_folder(input_folder, output_folder) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment