Skip to content

Instantly share code, notes, and snippets.

@manchicken
Created October 31, 2025 11:25
Show Gist options
  • Select an option

  • Save manchicken/d57a3ee42a41e804348fc71ebcceab06 to your computer and use it in GitHub Desktop.

Select an option

Save manchicken/d57a3ee42a41e804348fc71ebcceab06 to your computer and use it in GitHub Desktop.
Script to search a folder for files to make an inventory to scan
#!/usr/bin/env python3
"""
File Inventory Generator for Forensic Analysis
Scans folders recursively, identifies potentially malicious files,
and generates an inventory CSV for further analysis.
"""
import os
import sys
import csv
import mimetypes
import subprocess
import zipfile
from pathlib import Path
# Potentially dangerous executable extensions
EXECUTABLE_EXTENSIONS = {
# Windows executables
'.exe', '.dll', '.sys', '.drv', '.ocx', '.scr', '.cpl',
# Scripts
'.bat', '.cmd', '.com', '.ps1', '.vbs', '.vbe', '.js', '.jse',
'.wsf', '.wsh', '.msi', '.msp', '.hta', '.jar',
# Macros and office with macros
'.xlsm', '.xlam', '.xltm', '.docm', '.dotm', '.pptm', '.potm', '.ppam', '.ppsm',
# Linux/Unix executables
'.sh', '.bin', '.run', '.elf', '.so',
# Other potentially dangerous
'.app', '.deb', '.rpm', '.dmg', '.pkg', '.apk'
}
# Configuration files that could be abused
CONFIG_EXTENSIONS = {
'.ini', '.conf', '.config', '.cfg', '.xml', '.json', '.yaml', '.yml',
'.reg', '.plist', '.toml', '.properties'
}
# Document extensions (for mismatch detection)
DOCUMENT_EXTENSIONS = {
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
'.odt', '.ods', '.odp', '.rtf'
}
# Archive extensions to extract
ARCHIVE_EXTENSIONS = {
'.zip'
}
def get_mime_by_extension(file_path):
"""Get MIME type based on file extension"""
mime_type, _ = mimetypes.guess_type(file_path)
return mime_type or "unknown"
def get_actual_mime_type(file_path):
"""Get actual MIME type using the file command"""
try:
result = subprocess.run(
['file', '--mime-type', '-b', file_path],
capture_output=True,
text=True,
timeout=5
)
return result.stdout.strip()
except Exception as e:
return f"error: {str(e)}"
def human_readable_size(size_bytes):
"""Convert bytes to human-readable format"""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if size_bytes < 1024.0:
return f"{size_bytes:.2f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.2f} PB"
def should_include_file(file_path, ext, mime_by_ext, actual_mime):
"""Determine if file should be included in inventory"""
ext_lower = ext.lower()
# Include executables
if ext_lower in EXECUTABLE_EXTENSIONS:
return True
# Include config files
if ext_lower in CONFIG_EXTENSIONS:
return True
# Include documents with mismatched MIME types
if ext_lower in DOCUMENT_EXTENSIONS:
# Check if actual MIME doesn't match expected document type
if 'application/zip' in actual_mime or 'application/x-' in actual_mime:
return True # Suspicious mismatch
if 'text/' in actual_mime and ext_lower not in {'.txt', '.rtf'}:
return True # Document claiming to be text
# Include files with executable MIME types regardless of extension
if any(x in actual_mime.lower() for x in [
'application/x-executable',
'application/x-sharedlib',
'application/x-mach-binary',
'application/x-dosexec',
'application/x-msdownload'
]):
return True
# Include scripts
if any(x in actual_mime.lower() for x in [
'text/x-shellscript',
'text/x-python',
'text/x-perl',
'application/x-javascript'
]):
return True
return False
def extract_zip_in_place(zip_path):
"""Extract ZIP file to a folder with the same name"""
try:
# Create extraction folder
extract_folder = zip_path.rsplit('.', 1)[0]
os.makedirs(extract_folder, exist_ok=True)
# Extract
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_folder)
print(f"[+] Extracted: {zip_path} -> {extract_folder}")
return extract_folder
except Exception as e:
print(f"[!] Failed to extract {zip_path}: {e}")
return None
def scan_folder(input_folder, output_folder):
"""Main scanning function"""
print(f"[*] Starting inventory scan of: {input_folder}")
print(f"[*] Output folder: {output_folder}")
# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)
# Output CSV
inventory_csv = os.path.join(output_folder, "inventory.csv")
# CSV columns
fieldnames = [
'File',
'Type by Extension',
'Actual Type',
'Size'
]
# Track processed files to avoid duplicates
processed_files = set()
inventory_rows = []
# Queue of folders to process (for handling extracted ZIPs)
folders_to_scan = [input_folder]
while folders_to_scan:
current_folder = folders_to_scan.pop(0)
print(f"\n[*] Scanning: {current_folder}")
for root, dirs, files in os.walk(current_folder):
for filename in files:
file_path = os.path.join(root, filename)
# Skip if already processed
if file_path in processed_files:
continue
processed_files.add(file_path)
# Get file info
try:
file_size = os.path.getsize(file_path)
ext = os.path.splitext(filename)[1]
# Handle ZIP files - extract and queue for scanning
if ext.lower() in ARCHIVE_EXTENSIONS:
print(f"[*] Found archive: {file_path}")
extracted_folder = extract_zip_in_place(file_path)
if extracted_folder:
folders_to_scan.append(extracted_folder)
continue # Don't add ZIP to inventory, scan contents instead
# Get MIME types
mime_by_ext = get_mime_by_extension(file_path)
actual_mime = get_actual_mime_type(file_path)
# Check if file should be included
if should_include_file(file_path, ext, mime_by_ext, actual_mime):
row = {
'File': file_path,
'Type by Extension': mime_by_ext,
'Actual Type': actual_mime,
'Size': human_readable_size(file_size)
}
inventory_rows.append(row)
print(f" [+] Added: {filename} ({actual_mime})")
except Exception as e:
print(f" [!] Error processing {file_path}: {e}")
# Write inventory CSV
with open(inventory_csv, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(inventory_rows)
# Summary
print(f"\n{'='*60}")
print(f"[+] INVENTORY COMPLETE")
print(f"{'='*60}")
print(f"Total files in inventory: {len(inventory_rows)}")
print(f"Inventory saved to: {inventory_csv}")
print(f"{'='*60}")
def main():
"""Main entry point"""
if len(sys.argv) != 3:
print("Usage: make-inventory <input_folder> <output_folder>")
print("\nExample:")
print(" make-inventory /path/to/scan /path/to/results")
sys.exit(1)
input_folder = sys.argv[1]
output_folder = sys.argv[2]
# Validate input folder exists
if not os.path.isdir(input_folder):
print(f"Error: Input folder does not exist: {input_folder}")
sys.exit(1)
# Check if 'file' command is available
try:
subprocess.run(['file', '--version'], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
print("Error: 'file' command not found. Please install it:")
print(" Ubuntu/Debian: sudo apt-get install file")
print(" Fedora/RHEL: sudo dnf install file")
sys.exit(1)
scan_folder(input_folder, output_folder)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment