Skip to content

Instantly share code, notes, and snippets.

@kenny-kvibe
Created May 5, 2025 15:33
Show Gist options
  • Select an option

  • Save kenny-kvibe/83191ecbebdbc3a9b4c406a8fb7d6570 to your computer and use it in GitHub Desktop.

Select an option

Save kenny-kvibe/83191ecbebdbc3a9b4c406a8fb7d6570 to your computer and use it in GitHub Desktop.
Search & print different files from 2 or more directories based on sha256 checksum
import hashlib
import json
import os
TARGET_DIRS_PATH = [] # <-- Find in these directories
def walk_directory(dir_path: str, exclude_names: tuple[str, ...] | list[str] | None = None) -> list[str]:
file_paths = []
for file_name in os.listdir(dir_path):
file_path = os.path.join(dir_path, file_name)
if os.path.exists(file_path) and (not exclude_names or file_name not in exclude_names):
if os.path.isdir(file_path):
file_paths.extend(walk_directory(file_path, exclude_names))
else:
file_paths.append(file_path)
return file_paths
def file_checksum_sha256(file_path: str) -> str:
checksum = ''
if os.path.exists(file_path):
hasher = hashlib.sha256()
with open(file_path, 'rb') as f:
while chunk := f.read(8192):
hasher.update(chunk)
checksum = hasher.hexdigest().upper()
return checksum
def find_diff_files(dirs_paths_ls: tuple[str, ...] | list[str], exclude_names: tuple[str, ...] | list[str] | None = None):
if len(dirs_paths_ls) < 2:
raise ValueError('Atleast 2 directory paths are needed to find the differences')
files = {dir_path: {} for dir_path in dirs_paths_ls}
for dir_path in dirs_paths_ls:
files[dir_path].update(dict(map(
lambda file: (file.split(dir_path, 1)[-1].strip('\\'), file_checksum_sha256(file)),
walk_directory(dir_path, exclude_names)
)))
print(f'Equal named files = {all(tuple(files[dir_path]) == tuple(files[other_dir_path]) for dir_path in files for other_dir_path in files if dir_path != other_dir_path)}')
for dir_path in files:
print(f'"{dir_path}" count = {len(files[dir_path])}')
diffs = {dir_path: [] for dir_path in dirs_paths_ls}
for dir_path in dirs_paths_ls:
for file in files[dir_path]:
for other_dir_path in dirs_paths_ls:
if dir_path != other_dir_path:
if file in files[other_dir_path] and files[dir_path][file] != files[other_dir_path][file]:
diffs[dir_path].append(file)
elif file not in files[other_dir_path]:
diffs[dir_path].append(file)
print(f'Diff files = {json.dumps(diffs, indent=2)}')
return diffs
def main() -> int:
find_diff_files(TARGET_DIRS_PATH, ['__pycache__'])
print('Done.')
return 0
if __name__ == '__main__':
raise SystemExit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment