1 min read
AI Security Best Practices: Protecting AI Systems
I wrote “AI Security Best Practices: Protecting AI Systems” to share practical, production-minded guidance on this topic.
AI Security Implementation
from dataclasses import dataclass
from typing import List, Optional
import hashlib
import hmac
@dataclass
class SecurityConfig:
encrypt_prompts: bool = True
rate_limiting: bool = True
input_validation: bool = True
output_filtering: bool = True
audit_logging: bool = True
class AISecurityLayer:
def __init__(self, config: SecurityConfig):
self.config = config
self.rate_limiter = RateLimiter()
self.validator = InputValidator()
self.filter = OutputFilter()
self.audit = AuditLogger()
async def secure_request(self, request: dict, user_context: dict) -> dict:
"""Apply security controls to AI request."""
# Rate limiting
if self.config.rate_limiting:
if not self.rate_limiter.allow(user_context["user_id"]):
raise RateLimitError("Too many requests")
# Input validation
if self.config.input_validation:
validation = self.validator.validate(request)
if not validation.passed:
self.audit.log("input_validation_failed", request, validation.errors)
raise ValidationError(validation.errors)
# Prompt injection detection
if self.detect_injection(request.get("prompt", "")):
self.audit.log("injection_detected", request, user_context)
raise SecurityError("Potential prompt injection detected")
# Audit logging
if self.config.audit_logging:
self.audit.log("request_received", request, user_context)
return request
async def secure_response(self, response: str, request: dict) -> str:
"""Apply security controls to AI response."""
# Output filtering
if self.config.output_filtering:
# Remove PII
response = self.filter.remove_pii(response)
# Remove sensitive data patterns
response = self.filter.remove_sensitive(response)
# Check for harmful content
if self.filter.is_harmful(response):
self.audit.log("harmful_content_blocked", request, response)
return "I cannot provide that information."
# Audit logging
if self.config.audit_logging:
self.audit.log("response_sent", request, {"response_hash": self.hash(response)})
return response
def detect_injection(self, prompt: str) -> bool:
"""Detect potential prompt injection attacks."""
injection_patterns = [
r"ignore (?:all |previous |above )?instructions",
r"you are now",
r"new instructions:",
r"<\|.*\|>",
r"system:",
r"assistant:",
r"\[INST\]",
r"human:",
r"```\s*system"
]
import re
for pattern in injection_patterns:
if re.search(pattern, prompt.lower()):
return True
return False
def hash(self, content: str) -> str:
"""Create hash for audit purposes."""
return hashlib.sha256(content.encode()).hexdigest()[:16]
class InputValidator:
def validate(self, request: dict) -> ValidationResult:
"""Validate AI request input."""
errors = []
# Length limits
if len(request.get("prompt", "")) > 100000:
errors.append("Prompt exceeds maximum length")
# Character validation
if self.contains_control_chars(request.get("prompt", "")):
errors.append("Invalid characters in prompt")
return ValidationResult(passed=len(errors) == 0, errors=errors)
Comprehensive security protects AI systems from emerging threats.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n