Created
November 13, 2025 12:48
-
-
Save armamini/fbd4d3390dae3a379e493f284a41124c to your computer and use it in GitHub Desktop.
ULP deduplicate tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import re | |
| from collections import OrderedDict | |
| def extract_credentials(line): | |
| pattern = r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}):(.+)$' | |
| match = re.search(pattern, line.strip()) | |
| if match: | |
| email = match.group(1) | |
| password = match.group(2) | |
| return (email, password) | |
| return None | |
| def deduplicate_credentials(input_file, output_file): | |
| unique_creds = OrderedDict() | |
| total_lines = 0 | |
| valid_lines = 0 | |
| try: | |
| with open(input_file, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| total_lines += 1 | |
| if line.strip(): # Skip empty lines | |
| creds = extract_credentials(line) | |
| if creds: | |
| valid_lines += 1 | |
| unique_creds[creds] = line.strip() | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| for original_line in unique_creds.values(): | |
| f.write(original_line + '\n') | |
| duplicates = valid_lines - len(unique_creds) | |
| print(f"Processing complete!") | |
| print(f"Total lines processed: {total_lines}") | |
| print(f"Valid credential lines: {valid_lines}") | |
| print(f"Unique credentials: {len(unique_creds)}") | |
| print(f"Duplicates removed: {duplicates}") | |
| print(f"\nOutput written to: {output_file}") | |
| except FileNotFoundError: | |
| print(f"Error: File '{input_file}' not found.") | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| def main(): | |
| import sys | |
| print("Email:Password Deduplicator") | |
| print("=" * 50) | |
| if len(sys.argv) < 2: | |
| print("Usage: python3 script.py <input_file>") | |
| print("Example: python3 script.py credentials.txt") | |
| sys.exit(1) | |
| input_file = sys.argv[1] | |
| if input_file.endswith('.txt'): | |
| output_file = input_file.replace('.txt', '_dedup.txt') | |
| else: | |
| output_file = input_file + '_dedup.txt' | |
| deduplicate_credentials(input_file, output_file) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment