Created
November 3, 2025 02:58
-
-
Save williamzujkowski/5c97f26a169c386e822ffe9a77e48507 to your computer and use it in GitHub Desktop.
Detect and block prompt injection attempts in LLM applications
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from typing import List | |
| class PromptSecurityFilter: | |
| """Detect and block prompt injection attempts.""" | |
| def __init__(self): | |
| self.blocked_patterns = [ | |
| r"ignore previous instructions", | |
| r"disregard all prior", | |
| r"forget everything", | |
| r"new instructions:", | |
| r"system prompt", | |
| r"act as if", | |
| r"pretend you are", | |
| ] | |
| self.compiled_patterns = [re.compile(p, re.IGNORECASE) for p in self.blocked_patterns] | |
| def check_injection(self, prompt: str) -> tuple[bool, List[str]]: | |
| """Check for prompt injection patterns.""" | |
| detected = [] | |
| for pattern in self.compiled_patterns: | |
| if pattern.search(prompt): | |
| detected.append(pattern.pattern) | |
| return len(detected) > 0, detected | |
| def sanitize(self, prompt: str) -> str: | |
| """Remove suspicious content from prompts.""" | |
| sanitized = prompt | |
| for pattern in self.compiled_patterns: | |
| sanitized = pattern.sub('[FILTERED]', sanitized) | |
| return sanitized |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment