Created
December 5, 2025 00:27
-
-
Save exaland/67cb16346b316deda66d83b41a4ef0b9 to your computer and use it in GitHub Desktop.
Simple extractor to find hardcoded strings in Android XML files and move them to res/values/strings.xml.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Simple extractor to find hardcoded strings in Android XML files and move them to res/values/strings.xml. | |
| - Scans app/src/main/res for layout/menu/xml/etc (skips values/) | |
| - Targets attributes: android:text, android:hint, android:contentDescription, android:title, android:label, android:summary | |
| - Skips values already referencing @, ? or data-binding @{...} | |
| - Adds entries to app/src/main/res/values/strings.xml and replaces literals with @string/<key> | |
| - Creates locale placeholder values-<lang>/strings.xml for existing locales or a default set | |
| Usage: | |
| tools/extract_strings.py [--dry-run] | |
| Note: This is a best-effort tool. Review changes before committing. | |
| """ | |
| import os | |
| import re | |
| import sys | |
| import argparse | |
| import hashlib | |
| import datetime | |
| from collections import defaultdict | |
| from xml.etree import ElementTree as ET | |
| ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) | |
| APP_RES = os.path.join(ROOT, 'app', 'src', 'main', 'res') | |
| TARGET_ATTRS = ['text', 'hint', 'contentDescription', 'title', 'label', 'summary'] | |
| ATTR_RE = re.compile(r'android:({})="([^"]*)"'.format('|'.join(TARGET_ATTRS))) | |
| SKIP_PREFIX = ('@', '?', '@{') | |
| def load_existing_keys(strings_xml_path): | |
| keys = {} | |
| if not os.path.exists(strings_xml_path): | |
| return keys | |
| try: | |
| tree = ET.parse(strings_xml_path) | |
| root = tree.getroot() | |
| for child in root.findall('string'): | |
| name = child.get('name') | |
| text = ''.join(child.itertext()) if child is not None else '' | |
| keys[name] = text | |
| except ET.ParseError: | |
| # fallback: naive regex parse | |
| with open(strings_xml_path, 'r', encoding='utf-8') as f: | |
| data = f.read() | |
| for m in re.finditer(r'<string\s+name="([^"]+)">(.+?)</string>', data, re.S): | |
| keys[m.group(1)] = m.group(2) | |
| return keys | |
| def ensure_strings_xml(path): | |
| if not os.path.exists(path): | |
| os.makedirs(os.path.dirname(path), exist_ok=True) | |
| with open(path, 'w', encoding='utf-8') as f: | |
| f.write('<?xml version="1.0" encoding="utf-8"?>\n<resources>\n</resources>\n') | |
| def make_key(filename, attr, text, existing_keys): | |
| base = re.sub(r'[^0-9a-zA-Z]+', '_', filename).strip('_').lower() | |
| snippet = text.strip() | |
| # Short hash to reduce collisions | |
| h = hashlib.sha1(snippet.encode('utf-8')).hexdigest()[:8] | |
| key = f"str_{base}_{attr}_{h}" | |
| # ensure unique vs existing keys | |
| i = 1 | |
| orig = key | |
| while key in existing_keys: | |
| key = f"{orig}_{i}" | |
| i += 1 | |
| return key | |
| def find_locale_dirs(res_dir): | |
| dirs = [] | |
| for name in os.listdir(res_dir): | |
| if name.startswith('values-') and os.path.isdir(os.path.join(res_dir, name)): | |
| dirs.append(name) | |
| return sorted(dirs) | |
| def create_locale_placeholders(res_dir, new_entries, locales): | |
| created = [] | |
| for loc in locales: | |
| path = os.path.join(res_dir, loc, 'strings.xml') | |
| ensure_strings_xml(path) | |
| # naive append: only add if key missing | |
| keys = load_existing_keys(path) | |
| if not new_entries: | |
| continue | |
| with open(path, 'r', encoding='utf-8') as f: | |
| data = f.read() | |
| insert_pos = data.rfind('</resources>') | |
| if insert_pos == -1: | |
| continue | |
| additions = '\n <!-- TODO: auto-generated placeholders -->\n' | |
| for k, v in new_entries.items(): | |
| if k in keys: | |
| continue | |
| # escape ampersand and others | |
| v_esc = v.replace('&', '&').replace('<', '<').replace('>', '>') | |
| additions += f' <string name="{k}">{v_esc}</string>\n' | |
| if additions.strip(): | |
| newdata = data[:insert_pos] + additions + data[insert_pos:] | |
| with open(path, 'w', encoding='utf-8') as f: | |
| f.write(newdata) | |
| created.append(path) | |
| return created | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--dry-run', action='store_true', help='Show planned changes') | |
| parser.add_argument('--apply', action='store_true', help='Apply changes') | |
| args = parser.parse_args() | |
| if not os.path.isdir(APP_RES): | |
| print('ERROR: res directory not found at', APP_RES) | |
| sys.exit(1) | |
| strings_xml = os.path.join(APP_RES, 'values', 'strings.xml') | |
| ensure_strings_xml(strings_xml) | |
| existing_keys = load_existing_keys(strings_xml) | |
| # collect files to scan | |
| xml_files = [] | |
| for root, dirs, files in os.walk(APP_RES): | |
| # skip values folders | |
| if os.path.basename(root).startswith('values'): | |
| continue | |
| for fn in files: | |
| if fn.endswith('.xml'): | |
| xml_files.append(os.path.join(root, fn)) | |
| planned_changes = [] | |
| new_entries = {} | |
| files_modified = {} | |
| for path in xml_files: | |
| with open(path, 'r', encoding='utf-8') as f: | |
| text = f.read() | |
| # skip if no android: attributes present quickly | |
| if 'android:' not in text: | |
| continue | |
| matches = list(ATTR_RE.finditer(text)) | |
| if not matches: | |
| continue | |
| repl_text = text | |
| local_changes = [] | |
| for m in matches: | |
| attr = m.group(1) | |
| val = m.group(2) | |
| if not val or val.startswith(SKIP_PREFIX): | |
| continue | |
| # skip if contains data-binding or expressions | |
| if '@{' in val or '}' in val and val.strip().startswith('{'): | |
| continue | |
| # skip if translatable="false" on the tag (simple heuristic) | |
| # If thestring is mostly numbers or punctuation, skip | |
| if re.fullmatch(r'[\d\s\W]+', val): | |
| continue | |
| filename = os.path.splitext(os.path.basename(path))[0] | |
| key = make_key(filename, attr, val, existing_keys) | |
| existing_keys[key] = val | |
| new_entries[key] = val | |
| # prepare replacement: escape key reference | |
| # replace only this exact attribute occurrence | |
| old_attr = f'android:{attr}="{val}"' | |
| new_attr = f'android:{attr}="@string/{key}"' | |
| if old_attr in repl_text: | |
| repl_text = repl_text.replace(old_attr, new_attr, 1) | |
| local_changes.append((attr, val, key)) | |
| if local_changes: | |
| planned_changes.append((path, local_changes)) | |
| files_modified[path] = repl_text | |
| # show report | |
| ts = datetime.datetime.now().strftime('%Y%m%d%H%M%S') | |
| backup_dir = os.path.join(ROOT, 'backup', 'extracted', ts) | |
| print('Found', len(planned_changes), 'files with hardcoded strings.') | |
| total = sum(len(v) for _, v in planned_changes) | |
| print('Planned new string entries:', total) | |
| for p, changes in planned_changes: | |
| print('-', p) | |
| for attr, val, key in changes: | |
| print(f' {attr}: "{val}" -> @string/{key}') | |
| # list locales | |
| locales = find_locale_dirs(APP_RES) | |
| if not locales: | |
| # create default locales | |
| locales = ['values-fr', 'values-es', 'values-ar', 'values-sv'] | |
| print('No locale directories found; will create defaults:', locales) | |
| else: | |
| print('Found locale directories:', locales) | |
| if args.dry_run: | |
| print('\nDry run only. No files modified.') | |
| return | |
| # apply changes | |
| os.makedirs(backup_dir, exist_ok=True) | |
| for path, newtext in files_modified.items(): | |
| rel = os.path.relpath(path, ROOT) | |
| bak = os.path.join(backup_dir, rel.replace(os.sep, '_')) | |
| with open(bak, 'w', encoding='utf-8') as f: | |
| with open(path, 'r', encoding='utf-8') as orig: | |
| f.write(orig.read()) | |
| with open(path, 'w', encoding='utf-8') as f: | |
| f.write(newtext) | |
| print('Modified', path, '(backup at', bak, ')') | |
| # append new entries to main strings.xml | |
| if new_entries: | |
| with open(strings_xml, 'r', encoding='utf-8') as f: | |
| data = f.read() | |
| insert_pos = data.rfind('</resources>') | |
| additions = '\n <!-- auto-extracted strings - {} -->\n'.format(ts) | |
| for k, v in new_entries.items(): | |
| v_esc = v.replace('&', '&').replace('<', '<').replace('>', '>') | |
| additions += f' <string name="{k}">{v_esc}</string>\n' | |
| newdata = data[:insert_pos] + additions + data[insert_pos:] | |
| with open(strings_xml, 'w', encoding='utf-8') as f: | |
| f.write(newdata) | |
| print('Updated', strings_xml) | |
| # create/update locale placeholders | |
| created = create_locale_placeholders(APP_RES, new_entries, locales) | |
| for c in created: | |
| print('Updated locale file', c) | |
| # write summary | |
| report_path = os.path.join(ROOT, 'tools', f'extract_strings_report_{ts}.txt') | |
| with open(report_path, 'w', encoding='utf-8') as rpt: | |
| rpt.write(f'Extracted at {ts}\n') | |
| rpt.write(f'Files modified: {len(planned_changes)}\n') | |
| rpt.write(f'New entries: {len(new_entries)}\n') | |
| for p, changes in planned_changes: | |
| rpt.write(p + '\n') | |
| for attr, val, key in changes: | |
| rpt.write(f' {attr}: "{val}" -> {key}\n') | |
| print('Wrote report to', report_path) | |
| if __name__ == '__main__': | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment