Skip to content

Instantly share code, notes, and snippets.

@drindt
Created July 6, 2025 17:07
Show Gist options
  • Select an option

  • Save drindt/1487d5c87e50223e5eb3d6ebd60e2842 to your computer and use it in GitHub Desktop.

Select an option

Save drindt/1487d5c87e50223e5eb3d6ebd60e2842 to your computer and use it in GitHub Desktop.
A tool for cleaning up verification-metadata.xml
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ======================================================================================
# Gradle Sweeper - A tool for cleaning up verification-metadata.xml
# https://gemini.google.com/app/98400daeaa43b060
#
# Author: Gemini, based on user strategy
# Version: 0.12.0
#
# SCRIPT WORKFLOW & USAGE:
#
# This tool helps you clean up a bloated verification-metadata.xml file by identifying
# exactly which artifacts and keys are used during your build. It operates in two
# distinct phases.
#
# PREREQUISITE:
# This script uses the 'inotify' Python library. Please install it first:
# $ pip install inotify
# (Note: inotify is Linux-specific)
#
# --- PHASE 1: RECORD ---
# In this mode, the script monitors and records all files accessed by Gradle in its
# dependency cache during a comprehensive build.
#
# 1. Start the recording process:
# $ ./gradle_sweeper.py record
#
# 2. In a SECOND terminal, run a comprehensive build to ensure all configurations
# are resolved. A good command for this is:
# $ ./gradlew clean check assembleDebug --no-build-cache
#
# 3. Once the build is complete, return to the first terminal and press ENTER.
#
# 4. The script will save a whitelist of all used files to 'gradle/used_artifacts.txt'.
#
# --- PHASE 2: CLEAN ---
# This mode uses the generated whitelist to remove all obsolete entries from your
# verification metadata file.
#
# 1. Ensure 'gradle/used_artifacts.txt' from Phase 1 exists.
#
# 2. Run the clean process:
# $ ./gradle_sweeper.py clean
#
# 3. The script will create a new, cleaned-up file named 'gradle/verification-metadata-cleaned.xml'.
#
# --- PHASE 3: VERIFY ---
# 1. Manually review the changes between the original and the '-cleaned.xml' file.
# 2. Replace the original file with the cleaned one.
# 3. Run a final, clean build to ensure everything still works as expected.
#
# ======================================================================================
import os
import re
import argparse
import threading
import xml.etree.ElementTree as ET
from xml.dom import minidom
try:
import inotify.adapters
import inotify.constants
except ImportError:
print("ERROR: The 'inotify' library is required. Please install it using 'pip install inotify'")
exit(1)
# --- Configuration ---
GRADLE_DIR = "gradle"
VERIFICATION_XML_PATH = os.path.join(GRADLE_DIR, "verification-metadata.xml")
CLEANED_XML_PATH = os.path.join(GRADLE_DIR, "verification-metadata-cleaned.xml")
WHITELIST_FILE = os.path.join(GRADLE_DIR, "used_artifacts.txt")
# ANSI color codes for terminal output
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
RESET = "\033[0m"
# --- End of Configuration ---
def get_gradle_cache_path():
"""Finds the Gradle dependency cache directory."""
home = os.path.expanduser("~")
return os.path.join(home, ".gradle", "caches", "modules-2", "files-2.1")
def record_file_access(output_file):
"""Monitors file access in the Gradle cache during a build."""
cache_path = get_gradle_cache_path()
if not os.path.isdir(cache_path):
print(f"{RED}ERROR: Gradle cache directory not found at '{cache_path}'.{RESET}")
print("Please run a Gradle build at least once to create the cache.")
return
print(f"{GREEN}Starting to monitor Gradle cache: {cache_path}{RESET}")
print("\n" + "=" * 60)
print(f"{YELLOW}ACTION REQUIRED IN A SEPARATE TERMINAL:{RESET}")
print("1. Navigate to your project's root directory.")
print("2. Run a comprehensive, clean build. Recommended command:")
print(f" {GREEN}./gradlew clean assembleDebug --no-build-cache{RESET}")
print("3. Wait for the build to complete.")
print("=" * 60)
print(f"\n{YELLOW}Once the build is finished, press ENTER in THIS terminal to stop recording...{RESET}")
i = inotify.adapters.InotifyTree(cache_path, mask=(inotify.constants.IN_OPEN | inotify.constants.IN_ACCESS))
accessed_files = set()
stop_event = threading.Event()
def watcher_thread():
try:
for event in i.event_gen(yield_nones=False):
if stop_event.is_set():
break
(_, _, path, filename) = event
if filename:
full_path = os.path.join(path, filename)
accessed_files.add(full_path)
except Exception as e:
print(f"{RED}Error in watcher thread: {e}{RESET}")
thread = threading.Thread(target=watcher_thread)
thread.start()
input() # Wait for user to press Enter
stop_event.set()
try:
dummy_file = os.path.join(cache_path, "dummy_for_unblock")
with open(dummy_file, "w") as f:
f.write("dummy")
os.remove(dummy_file)
except OSError:
pass
thread.join(timeout=2)
print(f"\nRecording stopped. Found {len(accessed_files)} unique accessed files.")
# Ensure the output directory exists
os.makedirs(os.path.dirname(output_file), exist_ok=True)
with open(output_file, "w") as f:
for file_path in sorted(list(accessed_files)):
f.write(file_path + "\n")
print(f"{GREEN}Whitelist of used artifacts saved to '{output_file}'.{RESET}")
def clean_metadata_file(whitelist_file, xml_path):
"""Cleans the verification metadata XML based on a whitelist of used files."""
print("\n" + "=" * 60)
print("=== Starting Metadata Cleanup ===")
print("=" * 60)
if not os.path.exists(whitelist_file):
print(f"{RED}ERROR: Whitelist file '{whitelist_file}' not found.{RESET}")
print("Please run the 'record' command first to generate it.")
return
if not os.path.exists(xml_path):
print(f"{RED}ERROR: Verification metadata file not found at '{xml_path}'.{RESET}")
return
with open(whitelist_file, "r") as f:
used_files = set(line.strip() for line in f)
print(f"--- Analyzing '{xml_path}' against {len(used_files)} used files ---")
with open(xml_path, "r", encoding="utf-8") as f:
original_content = f.read()
xml_start_index = original_content.find("<verification-metadata")
if xml_start_index == -1:
print(f"{RED}ERROR: Could not find the <verification-metadata> root element.{RESET}")
return
header = original_content[:xml_start_index]
xml_body = original_content[xml_start_index:]
namespace = "https://schema.gradle.org/dependency-verification"
ET.register_namespace("", namespace)
root = ET.fromstring(xml_body)
components_node = root.find(f"{{{namespace}}}components")
if components_node is None:
print(f"{RED}ERROR: <components> tag not found.{RESET}")
return
removed_components_count = 0
# First pass: remove unused components entirely
for component in list(components_node):
group = component.attrib["group"]
name = component.attrib["name"]
version = component.attrib["version"]
# A component is used if ANY of its files were accessed.
# Construct the expected path segment for this component.
component_path_segment = os.path.join(group.replace(".", "/"), name, version)
is_used = any(component_path_segment in used_file for used_file in used_files)
if not is_used:
print(f" Removing unused component: {group}:{name}:{version}")
components_node.remove(component)
removed_components_count += 1
# Second pass: identify all keys that are still required by the REMAINING components
print("\n--- Analyzing trusted keys ---")
remaining_components = set()
for component in components_node:
remaining_components.add((component.attrib["group"], component.attrib["name"], component.attrib["version"]))
trusted_keys_node = root.find(f".//{{{namespace}}}trusted-keys")
removed_keys_count = 0
if trusted_keys_node is not None:
for key_node in list(trusted_keys_node):
key_id = key_node.attrib.get("id")
if not key_id:
continue
is_required = False
rules = [key_node] + key_node.findall(f"{{{namespace}}}trusting")
for rule in rules:
if is_required:
break
rule_group = rule.attrib.get("group")
rule_name = rule.attrib.get("name")
rule_version = rule.attrib.get("version")
is_regex = rule.attrib.get("regex") == "true"
for comp_group, comp_name, comp_version in remaining_components:
matches = True
if rule_group:
if is_regex:
try:
if not re.match(rule_group, comp_group):
matches = False
except re.error:
matches = False
else:
if rule_group != comp_group:
matches = False
if matches and rule_name and rule_name != comp_name:
matches = False
if matches and rule_version and rule_version != comp_version:
matches = False
if matches:
print(f" Keeping key {key_id[:16]}... because its rule matches {comp_group}:{comp_name}")
is_required = True
break
if not is_required:
print(f" {YELLOW}Removing unused trusted key: {key_id}{RESET}")
trusted_keys_node.remove(key_node)
removed_keys_count += 1
print("\n--- Cleanup Summary ---")
print(f"Removed {removed_components_count} unused component entries.")
print(f"Removed {removed_keys_count} unused trusted key entries.")
# Write the cleaned XML
xml_str = ET.tostring(root, encoding="unicode")
reparsed = minidom.parseString(xml_str)
pretty_xml_str = reparsed.toprettyxml(indent=" ", newl="\n")[reparsed.toprettyxml().find("?>") + 2 :].strip()
cleaned_xml_lines = [line for line in pretty_xml_str.split("\n") if line.strip()]
cleaned_xml_str = "\n".join(cleaned_xml_lines)
with open(CLEANED_XML_PATH, "w", encoding="utf-8") as f:
f.write(header)
f.write(cleaned_xml_str)
print(f"\n{GREEN}Successfully created cleaned file: '{CLEANED_XML_PATH}'{RESET}")
print("Please review the changes and replace the original file if you are satisfied.")
def main():
"""Main function to handle command-line arguments and modes."""
parser = argparse.ArgumentParser(
description="A tool to record Gradle dependency usage and clean the verification metadata file.",
formatter_class=argparse.RawTextHelpFormatter,
)
subparsers = parser.add_subparsers(dest="command", required=True)
parser_record = subparsers.add_parser("record", help="Records dependency file access during a Gradle build.")
parser_record.set_defaults(func=lambda args: record_file_access(WHITELIST_FILE))
parser_clean = subparsers.add_parser(
"clean", help="Cleans the metadata file using a previously recorded whitelist."
)
parser_clean.set_defaults(func=lambda args: clean_metadata_file(WHITELIST_FILE, VERIFICATION_XML_PATH))
args = parser.parse_args()
print("=================================================")
print(f"=== {GREEN}Gradle Sweeper{RESET} ===")
print("=================================================")
args.func(args)
print("\n=================================================")
print("=== Tool execution finished ===")
print("=================================================")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment