Skip to content

Instantly share code, notes, and snippets.

@exaland
Created December 5, 2025 00:27
Show Gist options
  • Select an option

  • Save exaland/67cb16346b316deda66d83b41a4ef0b9 to your computer and use it in GitHub Desktop.

Select an option

Save exaland/67cb16346b316deda66d83b41a4ef0b9 to your computer and use it in GitHub Desktop.
Simple extractor to find hardcoded strings in Android XML files and move them to res/values/strings.xml.
#!/usr/bin/env python3
"""
Simple extractor to find hardcoded strings in Android XML files and move them to res/values/strings.xml.
- Scans app/src/main/res for layout/menu/xml/etc (skips values/)
- Targets attributes: android:text, android:hint, android:contentDescription, android:title, android:label, android:summary
- Skips values already referencing @, ? or data-binding @{...}
- Adds entries to app/src/main/res/values/strings.xml and replaces literals with @string/<key>
- Creates locale placeholder values-<lang>/strings.xml for existing locales or a default set
Usage:
tools/extract_strings.py [--dry-run]
Note: This is a best-effort tool. Review changes before committing.
"""
import os
import re
import sys
import argparse
import hashlib
import datetime
from collections import defaultdict
from xml.etree import ElementTree as ET
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
APP_RES = os.path.join(ROOT, 'app', 'src', 'main', 'res')
TARGET_ATTRS = ['text', 'hint', 'contentDescription', 'title', 'label', 'summary']
ATTR_RE = re.compile(r'android:({})="([^"]*)"'.format('|'.join(TARGET_ATTRS)))
SKIP_PREFIX = ('@', '?', '@{')
def load_existing_keys(strings_xml_path):
keys = {}
if not os.path.exists(strings_xml_path):
return keys
try:
tree = ET.parse(strings_xml_path)
root = tree.getroot()
for child in root.findall('string'):
name = child.get('name')
text = ''.join(child.itertext()) if child is not None else ''
keys[name] = text
except ET.ParseError:
# fallback: naive regex parse
with open(strings_xml_path, 'r', encoding='utf-8') as f:
data = f.read()
for m in re.finditer(r'<string\s+name="([^"]+)">(.+?)</string>', data, re.S):
keys[m.group(1)] = m.group(2)
return keys
def ensure_strings_xml(path):
if not os.path.exists(path):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w', encoding='utf-8') as f:
f.write('<?xml version="1.0" encoding="utf-8"?>\n<resources>\n</resources>\n')
def make_key(filename, attr, text, existing_keys):
base = re.sub(r'[^0-9a-zA-Z]+', '_', filename).strip('_').lower()
snippet = text.strip()
# Short hash to reduce collisions
h = hashlib.sha1(snippet.encode('utf-8')).hexdigest()[:8]
key = f"str_{base}_{attr}_{h}"
# ensure unique vs existing keys
i = 1
orig = key
while key in existing_keys:
key = f"{orig}_{i}"
i += 1
return key
def find_locale_dirs(res_dir):
dirs = []
for name in os.listdir(res_dir):
if name.startswith('values-') and os.path.isdir(os.path.join(res_dir, name)):
dirs.append(name)
return sorted(dirs)
def create_locale_placeholders(res_dir, new_entries, locales):
created = []
for loc in locales:
path = os.path.join(res_dir, loc, 'strings.xml')
ensure_strings_xml(path)
# naive append: only add if key missing
keys = load_existing_keys(path)
if not new_entries:
continue
with open(path, 'r', encoding='utf-8') as f:
data = f.read()
insert_pos = data.rfind('</resources>')
if insert_pos == -1:
continue
additions = '\n <!-- TODO: auto-generated placeholders -->\n'
for k, v in new_entries.items():
if k in keys:
continue
# escape ampersand and others
v_esc = v.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
additions += f' <string name="{k}">{v_esc}</string>\n'
if additions.strip():
newdata = data[:insert_pos] + additions + data[insert_pos:]
with open(path, 'w', encoding='utf-8') as f:
f.write(newdata)
created.append(path)
return created
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--dry-run', action='store_true', help='Show planned changes')
parser.add_argument('--apply', action='store_true', help='Apply changes')
args = parser.parse_args()
if not os.path.isdir(APP_RES):
print('ERROR: res directory not found at', APP_RES)
sys.exit(1)
strings_xml = os.path.join(APP_RES, 'values', 'strings.xml')
ensure_strings_xml(strings_xml)
existing_keys = load_existing_keys(strings_xml)
# collect files to scan
xml_files = []
for root, dirs, files in os.walk(APP_RES):
# skip values folders
if os.path.basename(root).startswith('values'):
continue
for fn in files:
if fn.endswith('.xml'):
xml_files.append(os.path.join(root, fn))
planned_changes = []
new_entries = {}
files_modified = {}
for path in xml_files:
with open(path, 'r', encoding='utf-8') as f:
text = f.read()
# skip if no android: attributes present quickly
if 'android:' not in text:
continue
matches = list(ATTR_RE.finditer(text))
if not matches:
continue
repl_text = text
local_changes = []
for m in matches:
attr = m.group(1)
val = m.group(2)
if not val or val.startswith(SKIP_PREFIX):
continue
# skip if contains data-binding or expressions
if '@{' in val or '}' in val and val.strip().startswith('{'):
continue
# skip if translatable="false" on the tag (simple heuristic)
# If thestring is mostly numbers or punctuation, skip
if re.fullmatch(r'[\d\s\W]+', val):
continue
filename = os.path.splitext(os.path.basename(path))[0]
key = make_key(filename, attr, val, existing_keys)
existing_keys[key] = val
new_entries[key] = val
# prepare replacement: escape key reference
# replace only this exact attribute occurrence
old_attr = f'android:{attr}="{val}"'
new_attr = f'android:{attr}="@string/{key}"'
if old_attr in repl_text:
repl_text = repl_text.replace(old_attr, new_attr, 1)
local_changes.append((attr, val, key))
if local_changes:
planned_changes.append((path, local_changes))
files_modified[path] = repl_text
# show report
ts = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
backup_dir = os.path.join(ROOT, 'backup', 'extracted', ts)
print('Found', len(planned_changes), 'files with hardcoded strings.')
total = sum(len(v) for _, v in planned_changes)
print('Planned new string entries:', total)
for p, changes in planned_changes:
print('-', p)
for attr, val, key in changes:
print(f' {attr}: "{val}" -> @string/{key}')
# list locales
locales = find_locale_dirs(APP_RES)
if not locales:
# create default locales
locales = ['values-fr', 'values-es', 'values-ar', 'values-sv']
print('No locale directories found; will create defaults:', locales)
else:
print('Found locale directories:', locales)
if args.dry_run:
print('\nDry run only. No files modified.')
return
# apply changes
os.makedirs(backup_dir, exist_ok=True)
for path, newtext in files_modified.items():
rel = os.path.relpath(path, ROOT)
bak = os.path.join(backup_dir, rel.replace(os.sep, '_'))
with open(bak, 'w', encoding='utf-8') as f:
with open(path, 'r', encoding='utf-8') as orig:
f.write(orig.read())
with open(path, 'w', encoding='utf-8') as f:
f.write(newtext)
print('Modified', path, '(backup at', bak, ')')
# append new entries to main strings.xml
if new_entries:
with open(strings_xml, 'r', encoding='utf-8') as f:
data = f.read()
insert_pos = data.rfind('</resources>')
additions = '\n <!-- auto-extracted strings - {} -->\n'.format(ts)
for k, v in new_entries.items():
v_esc = v.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
additions += f' <string name="{k}">{v_esc}</string>\n'
newdata = data[:insert_pos] + additions + data[insert_pos:]
with open(strings_xml, 'w', encoding='utf-8') as f:
f.write(newdata)
print('Updated', strings_xml)
# create/update locale placeholders
created = create_locale_placeholders(APP_RES, new_entries, locales)
for c in created:
print('Updated locale file', c)
# write summary
report_path = os.path.join(ROOT, 'tools', f'extract_strings_report_{ts}.txt')
with open(report_path, 'w', encoding='utf-8') as rpt:
rpt.write(f'Extracted at {ts}\n')
rpt.write(f'Files modified: {len(planned_changes)}\n')
rpt.write(f'New entries: {len(new_entries)}\n')
for p, changes in planned_changes:
rpt.write(p + '\n')
for attr, val, key in changes:
rpt.write(f' {attr}: "{val}" -> {key}\n')
print('Wrote report to', report_path)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment