Forked from williamzujkowski/prompt-security-filter.py
Created
December 3, 2025 12:08
-
-
Save adampielak/c2d844cf5bc39c5596bee841e81c6aab to your computer and use it in GitHub Desktop.
Detect and block prompt injection attempts in LLM applications
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from typing import List | |
| class PromptSecurityFilter: | |
| """Detect and block prompt injection attempts.""" | |
| def __init__(self): | |
| self.blocked_patterns = [ | |
| r"ignore previous instructions", | |
| r"disregard all prior", | |
| r"forget everything", | |
| r"new instructions:", | |
| r"system prompt", | |
| r"act as if", | |
| r"pretend you are", | |
| ] | |
| self.compiled_patterns = [re.compile(p, re.IGNORECASE) for p in self.blocked_patterns] | |
| def check_injection(self, prompt: str) -> tuple[bool, List[str]]: | |
| """Check for prompt injection patterns.""" | |
| detected = [] | |
| for pattern in self.compiled_patterns: | |
| if pattern.search(prompt): | |
| detected.append(pattern.pattern) | |
| return len(detected) > 0, detected | |
| def sanitize(self, prompt: str) -> str: | |
| """Remove suspicious content from prompts.""" | |
| sanitized = prompt | |
| for pattern in self.compiled_patterns: | |
| sanitized = pattern.sub('[FILTERED]', sanitized) | |
| return sanitized |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment