Skip to content

Instantly share code, notes, and snippets.

@cryptoluks
Last active August 3, 2025 18:03
Show Gist options
  • Select an option

  • Save cryptoluks/55b77b1985f11cb8bd5735117127264d to your computer and use it in GitHub Desktop.

Select an option

Save cryptoluks/55b77b1985f11cb8bd5735117127264d to your computer and use it in GitHub Desktop.
Pre-consume hook for Paperless-ngx. This script checks if a PDF is password-protected, then tries a list of passwords from a dictionary file to unlock it. When successful, it saves the PDF with a deterministic ID so duplicate detection remains accurate.
#!/usr/bin/env python3
import os
import sys
import pikepdf
def log(message):
# Simple logging function; stdout and stderr are captured by Paperless-ngx
print(message)
def is_pdf_encrypted(file_path):
"""
Determine if a PDF is password-protected by trying to open it without a password.
Returns True if the PDF is encrypted.
"""
try:
with pikepdf.open(file_path) as pdf:
return False # Opened successfully – not encrypted
except pikepdf.PasswordError:
return True # Raised a PasswordError – file is encrypted
except Exception as e:
log(f"Error during encryption check: {e}")
return False
def unlock_pdf(file_path):
# Ensure the file exists
if not os.path.exists(file_path):
log(f"Error: File not found: {file_path}")
return
# Log available environment variables for debugging
task_id = os.environ.get("TASK_ID", "N/A")
source_path = os.environ.get("DOCUMENT_SOURCE_PATH", "Unknown")
log(f"Processing TASK_ID: {task_id}")
log(f"Source file: {source_path}")
log(f"Working file: {file_path}")
# Only process if the PDF is encrypted
if not is_pdf_encrypted(file_path):
log("PDF is not password protected. Skipping password removal.")
return
log("PDF is encrypted. Attempting password removal...")
# Read password dictionary
dictionary_path = "/usr/local/bin/scripts/passwords.txt"
if not os.path.exists(dictionary_path):
log("Error: Password dictionary file not found!")
return
with open(dictionary_path, "r") as f:
passwords = [line.strip() for line in f if line.strip()]
if not passwords:
log("Error: Password dictionary is empty. Nothing to try.")
return
# Iterate through the passwords and attempt to unlock the PDF
for pwd in passwords:
try:
with pikepdf.open(
file_path, password=pwd, allow_overwriting_input=True
) as pdf:
log(f"Success: Password '{pwd}' worked. Removing password...")
pdf.save(file_path, deterministic_id=True)
log("Password removal successful.")
return
except pikepdf.PasswordError:
log(f"Password '{pwd}' did not work.")
except Exception as e:
log(f"An unexpected error occurred: {e}")
return
log("None of the provided passwords worked. PDF remains encrypted.")
if __name__ == "__main__":
# PAPERLESS_PRE_CONSUME_SCRIPT should only modify DOCUMENT_WORKING_PATH.
file_path = os.environ.get("DOCUMENT_WORKING_PATH")
if not file_path:
log("Error: DOCUMENT_WORKING_PATH environment variable is not set.")
sys.exit(1)
unlock_pdf(file_path)
@AbDhops
Copy link

AbDhops commented Apr 20, 2025

Just a suggestion : Is it possible to apply a particular tag (say : Encrypted), if the script detects password protected file ? In this case, it will be possible to filter the files at later stage ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment