Skip to content

Instantly share code, notes, and snippets.

@dragon-dxw
Created September 8, 2025 13:43
Show Gist options
  • Select an option

  • Save dragon-dxw/c7b42a887e5258a3d205e1b9f5a6a551 to your computer and use it in GitHub Desktop.

Select an option

Save dragon-dxw/c7b42a887e5258a3d205e1b9f5a6a551 to your computer and use it in GitHub Desktop.
from zipfile import ZipFile
import lxml.etree
import sys
REDACTION_STRING = ""
namespaces = {
"cp": "http://schemas.openxmlformats.org/package/2006/metadata/core-properties",
"dc": "http://purl.org/dc/elements/1.1/",
"w15": "http://schemas.microsoft.com/office/word/2012/wordml",
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
}
forbidden_attributes = ["w15:author", "w15:userId", "w:author"]
forbidden_tags = ["cp:lastModifiedBy", "dc:creator"]
filename=sys.argv[1]
with ZipFile(filename+".out.docx", "w") as archive_output, ZipFile(filename, "r") as archive_input:
for archive_filename in archive_input.namelist():
with archive_input.open(archive_filename, "r") as f:
xml = f.read()
root = lxml.etree.fromstring(xml)
for attribute in forbidden_attributes:
attribute_namespace, _, attribute_name = attribute.partition(":")
for node in root.xpath(f"//*[@{attribute}]", namespaces=namespaces):
node.attrib[f"{{{namespaces[attribute_namespace]}}}{attribute_name}"] = REDACTION_STRING
for tag in forbidden_tags:
for node in root.xpath(f"//{tag}", namespaces=namespaces):
node.text = REDACTION_STRING
output_xml = lxml.etree.tostring(root)
archive_output.writestr(archive_filename, output_xml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment