Created
July 6, 2025 17:07
-
-
Save drindt/1487d5c87e50223e5eb3d6ebd60e2842 to your computer and use it in GitHub Desktop.
A tool for cleaning up verification-metadata.xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| # ====================================================================================== | |
| # Gradle Sweeper - A tool for cleaning up verification-metadata.xml | |
| # https://gemini.google.com/app/98400daeaa43b060 | |
| # | |
| # Author: Gemini, based on user strategy | |
| # Version: 0.12.0 | |
| # | |
| # SCRIPT WORKFLOW & USAGE: | |
| # | |
| # This tool helps you clean up a bloated verification-metadata.xml file by identifying | |
| # exactly which artifacts and keys are used during your build. It operates in two | |
| # distinct phases. | |
| # | |
| # PREREQUISITE: | |
| # This script uses the 'inotify' Python library. Please install it first: | |
| # $ pip install inotify | |
| # (Note: inotify is Linux-specific) | |
| # | |
| # --- PHASE 1: RECORD --- | |
| # In this mode, the script monitors and records all files accessed by Gradle in its | |
| # dependency cache during a comprehensive build. | |
| # | |
| # 1. Start the recording process: | |
| # $ ./gradle_sweeper.py record | |
| # | |
| # 2. In a SECOND terminal, run a comprehensive build to ensure all configurations | |
| # are resolved. A good command for this is: | |
| # $ ./gradlew clean check assembleDebug --no-build-cache | |
| # | |
| # 3. Once the build is complete, return to the first terminal and press ENTER. | |
| # | |
| # 4. The script will save a whitelist of all used files to 'gradle/used_artifacts.txt'. | |
| # | |
| # --- PHASE 2: CLEAN --- | |
| # This mode uses the generated whitelist to remove all obsolete entries from your | |
| # verification metadata file. | |
| # | |
| # 1. Ensure 'gradle/used_artifacts.txt' from Phase 1 exists. | |
| # | |
| # 2. Run the clean process: | |
| # $ ./gradle_sweeper.py clean | |
| # | |
| # 3. The script will create a new, cleaned-up file named 'gradle/verification-metadata-cleaned.xml'. | |
| # | |
| # --- PHASE 3: VERIFY --- | |
| # 1. Manually review the changes between the original and the '-cleaned.xml' file. | |
| # 2. Replace the original file with the cleaned one. | |
| # 3. Run a final, clean build to ensure everything still works as expected. | |
| # | |
| # ====================================================================================== | |
| import os | |
| import re | |
| import argparse | |
| import threading | |
| import xml.etree.ElementTree as ET | |
| from xml.dom import minidom | |
| try: | |
| import inotify.adapters | |
| import inotify.constants | |
| except ImportError: | |
| print("ERROR: The 'inotify' library is required. Please install it using 'pip install inotify'") | |
| exit(1) | |
| # --- Configuration --- | |
| GRADLE_DIR = "gradle" | |
| VERIFICATION_XML_PATH = os.path.join(GRADLE_DIR, "verification-metadata.xml") | |
| CLEANED_XML_PATH = os.path.join(GRADLE_DIR, "verification-metadata-cleaned.xml") | |
| WHITELIST_FILE = os.path.join(GRADLE_DIR, "used_artifacts.txt") | |
| # ANSI color codes for terminal output | |
| RED = "\033[91m" | |
| GREEN = "\033[92m" | |
| YELLOW = "\033[93m" | |
| RESET = "\033[0m" | |
| # --- End of Configuration --- | |
| def get_gradle_cache_path(): | |
| """Finds the Gradle dependency cache directory.""" | |
| home = os.path.expanduser("~") | |
| return os.path.join(home, ".gradle", "caches", "modules-2", "files-2.1") | |
| def record_file_access(output_file): | |
| """Monitors file access in the Gradle cache during a build.""" | |
| cache_path = get_gradle_cache_path() | |
| if not os.path.isdir(cache_path): | |
| print(f"{RED}ERROR: Gradle cache directory not found at '{cache_path}'.{RESET}") | |
| print("Please run a Gradle build at least once to create the cache.") | |
| return | |
| print(f"{GREEN}Starting to monitor Gradle cache: {cache_path}{RESET}") | |
| print("\n" + "=" * 60) | |
| print(f"{YELLOW}ACTION REQUIRED IN A SEPARATE TERMINAL:{RESET}") | |
| print("1. Navigate to your project's root directory.") | |
| print("2. Run a comprehensive, clean build. Recommended command:") | |
| print(f" {GREEN}./gradlew clean assembleDebug --no-build-cache{RESET}") | |
| print("3. Wait for the build to complete.") | |
| print("=" * 60) | |
| print(f"\n{YELLOW}Once the build is finished, press ENTER in THIS terminal to stop recording...{RESET}") | |
| i = inotify.adapters.InotifyTree(cache_path, mask=(inotify.constants.IN_OPEN | inotify.constants.IN_ACCESS)) | |
| accessed_files = set() | |
| stop_event = threading.Event() | |
| def watcher_thread(): | |
| try: | |
| for event in i.event_gen(yield_nones=False): | |
| if stop_event.is_set(): | |
| break | |
| (_, _, path, filename) = event | |
| if filename: | |
| full_path = os.path.join(path, filename) | |
| accessed_files.add(full_path) | |
| except Exception as e: | |
| print(f"{RED}Error in watcher thread: {e}{RESET}") | |
| thread = threading.Thread(target=watcher_thread) | |
| thread.start() | |
| input() # Wait for user to press Enter | |
| stop_event.set() | |
| try: | |
| dummy_file = os.path.join(cache_path, "dummy_for_unblock") | |
| with open(dummy_file, "w") as f: | |
| f.write("dummy") | |
| os.remove(dummy_file) | |
| except OSError: | |
| pass | |
| thread.join(timeout=2) | |
| print(f"\nRecording stopped. Found {len(accessed_files)} unique accessed files.") | |
| # Ensure the output directory exists | |
| os.makedirs(os.path.dirname(output_file), exist_ok=True) | |
| with open(output_file, "w") as f: | |
| for file_path in sorted(list(accessed_files)): | |
| f.write(file_path + "\n") | |
| print(f"{GREEN}Whitelist of used artifacts saved to '{output_file}'.{RESET}") | |
| def clean_metadata_file(whitelist_file, xml_path): | |
| """Cleans the verification metadata XML based on a whitelist of used files.""" | |
| print("\n" + "=" * 60) | |
| print("=== Starting Metadata Cleanup ===") | |
| print("=" * 60) | |
| if not os.path.exists(whitelist_file): | |
| print(f"{RED}ERROR: Whitelist file '{whitelist_file}' not found.{RESET}") | |
| print("Please run the 'record' command first to generate it.") | |
| return | |
| if not os.path.exists(xml_path): | |
| print(f"{RED}ERROR: Verification metadata file not found at '{xml_path}'.{RESET}") | |
| return | |
| with open(whitelist_file, "r") as f: | |
| used_files = set(line.strip() for line in f) | |
| print(f"--- Analyzing '{xml_path}' against {len(used_files)} used files ---") | |
| with open(xml_path, "r", encoding="utf-8") as f: | |
| original_content = f.read() | |
| xml_start_index = original_content.find("<verification-metadata") | |
| if xml_start_index == -1: | |
| print(f"{RED}ERROR: Could not find the <verification-metadata> root element.{RESET}") | |
| return | |
| header = original_content[:xml_start_index] | |
| xml_body = original_content[xml_start_index:] | |
| namespace = "https://schema.gradle.org/dependency-verification" | |
| ET.register_namespace("", namespace) | |
| root = ET.fromstring(xml_body) | |
| components_node = root.find(f"{{{namespace}}}components") | |
| if components_node is None: | |
| print(f"{RED}ERROR: <components> tag not found.{RESET}") | |
| return | |
| removed_components_count = 0 | |
| # First pass: remove unused components entirely | |
| for component in list(components_node): | |
| group = component.attrib["group"] | |
| name = component.attrib["name"] | |
| version = component.attrib["version"] | |
| # A component is used if ANY of its files were accessed. | |
| # Construct the expected path segment for this component. | |
| component_path_segment = os.path.join(group.replace(".", "/"), name, version) | |
| is_used = any(component_path_segment in used_file for used_file in used_files) | |
| if not is_used: | |
| print(f" Removing unused component: {group}:{name}:{version}") | |
| components_node.remove(component) | |
| removed_components_count += 1 | |
| # Second pass: identify all keys that are still required by the REMAINING components | |
| print("\n--- Analyzing trusted keys ---") | |
| remaining_components = set() | |
| for component in components_node: | |
| remaining_components.add((component.attrib["group"], component.attrib["name"], component.attrib["version"])) | |
| trusted_keys_node = root.find(f".//{{{namespace}}}trusted-keys") | |
| removed_keys_count = 0 | |
| if trusted_keys_node is not None: | |
| for key_node in list(trusted_keys_node): | |
| key_id = key_node.attrib.get("id") | |
| if not key_id: | |
| continue | |
| is_required = False | |
| rules = [key_node] + key_node.findall(f"{{{namespace}}}trusting") | |
| for rule in rules: | |
| if is_required: | |
| break | |
| rule_group = rule.attrib.get("group") | |
| rule_name = rule.attrib.get("name") | |
| rule_version = rule.attrib.get("version") | |
| is_regex = rule.attrib.get("regex") == "true" | |
| for comp_group, comp_name, comp_version in remaining_components: | |
| matches = True | |
| if rule_group: | |
| if is_regex: | |
| try: | |
| if not re.match(rule_group, comp_group): | |
| matches = False | |
| except re.error: | |
| matches = False | |
| else: | |
| if rule_group != comp_group: | |
| matches = False | |
| if matches and rule_name and rule_name != comp_name: | |
| matches = False | |
| if matches and rule_version and rule_version != comp_version: | |
| matches = False | |
| if matches: | |
| print(f" Keeping key {key_id[:16]}... because its rule matches {comp_group}:{comp_name}") | |
| is_required = True | |
| break | |
| if not is_required: | |
| print(f" {YELLOW}Removing unused trusted key: {key_id}{RESET}") | |
| trusted_keys_node.remove(key_node) | |
| removed_keys_count += 1 | |
| print("\n--- Cleanup Summary ---") | |
| print(f"Removed {removed_components_count} unused component entries.") | |
| print(f"Removed {removed_keys_count} unused trusted key entries.") | |
| # Write the cleaned XML | |
| xml_str = ET.tostring(root, encoding="unicode") | |
| reparsed = minidom.parseString(xml_str) | |
| pretty_xml_str = reparsed.toprettyxml(indent=" ", newl="\n")[reparsed.toprettyxml().find("?>") + 2 :].strip() | |
| cleaned_xml_lines = [line for line in pretty_xml_str.split("\n") if line.strip()] | |
| cleaned_xml_str = "\n".join(cleaned_xml_lines) | |
| with open(CLEANED_XML_PATH, "w", encoding="utf-8") as f: | |
| f.write(header) | |
| f.write(cleaned_xml_str) | |
| print(f"\n{GREEN}Successfully created cleaned file: '{CLEANED_XML_PATH}'{RESET}") | |
| print("Please review the changes and replace the original file if you are satisfied.") | |
| def main(): | |
| """Main function to handle command-line arguments and modes.""" | |
| parser = argparse.ArgumentParser( | |
| description="A tool to record Gradle dependency usage and clean the verification metadata file.", | |
| formatter_class=argparse.RawTextHelpFormatter, | |
| ) | |
| subparsers = parser.add_subparsers(dest="command", required=True) | |
| parser_record = subparsers.add_parser("record", help="Records dependency file access during a Gradle build.") | |
| parser_record.set_defaults(func=lambda args: record_file_access(WHITELIST_FILE)) | |
| parser_clean = subparsers.add_parser( | |
| "clean", help="Cleans the metadata file using a previously recorded whitelist." | |
| ) | |
| parser_clean.set_defaults(func=lambda args: clean_metadata_file(WHITELIST_FILE, VERIFICATION_XML_PATH)) | |
| args = parser.parse_args() | |
| print("=================================================") | |
| print(f"=== {GREEN}Gradle Sweeper{RESET} ===") | |
| print("=================================================") | |
| args.func(args) | |
| print("\n=================================================") | |
| print("=== Tool execution finished ===") | |
| print("=================================================") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment