Skip to content
Back to Blog
1 min read

AI Security Best Practices: Protecting AI Systems

I wrote “AI Security Best Practices: Protecting AI Systems” to share practical, production-minded guidance on this topic.

AI Security Implementation

from dataclasses import dataclass
from typing import List, Optional
import hashlib
import hmac

@dataclass
class SecurityConfig:
    encrypt_prompts: bool = True
    rate_limiting: bool = True
    input_validation: bool = True
    output_filtering: bool = True
    audit_logging: bool = True

class AISecurityLayer:
    def __init__(self, config: SecurityConfig):
        self.config = config
        self.rate_limiter = RateLimiter()
        self.validator = InputValidator()
        self.filter = OutputFilter()
        self.audit = AuditLogger()

    async def secure_request(self, request: dict, user_context: dict) -> dict:
        """Apply security controls to AI request."""

        # Rate limiting
        if self.config.rate_limiting:
            if not self.rate_limiter.allow(user_context["user_id"]):
                raise RateLimitError("Too many requests")

        # Input validation
        if self.config.input_validation:
            validation = self.validator.validate(request)
            if not validation.passed:
                self.audit.log("input_validation_failed", request, validation.errors)
                raise ValidationError(validation.errors)

        # Prompt injection detection
        if self.detect_injection(request.get("prompt", "")):
            self.audit.log("injection_detected", request, user_context)
            raise SecurityError("Potential prompt injection detected")

        # Audit logging
        if self.config.audit_logging:
            self.audit.log("request_received", request, user_context)

        return request

    async def secure_response(self, response: str, request: dict) -> str:
        """Apply security controls to AI response."""

        # Output filtering
        if self.config.output_filtering:
            # Remove PII
            response = self.filter.remove_pii(response)

            # Remove sensitive data patterns
            response = self.filter.remove_sensitive(response)

            # Check for harmful content
            if self.filter.is_harmful(response):
                self.audit.log("harmful_content_blocked", request, response)
                return "I cannot provide that information."

        # Audit logging
        if self.config.audit_logging:
            self.audit.log("response_sent", request, {"response_hash": self.hash(response)})

        return response

    def detect_injection(self, prompt: str) -> bool:
        """Detect potential prompt injection attacks."""
        injection_patterns = [
            r"ignore (?:all |previous |above )?instructions",
            r"you are now",
            r"new instructions:",
            r"<\|.*\|>",
            r"system:",
            r"assistant:",
            r"\[INST\]",
            r"human:",
            r"```\s*system"
        ]

        import re
        for pattern in injection_patterns:
            if re.search(pattern, prompt.lower()):
                return True

        return False

    def hash(self, content: str) -> str:
        """Create hash for audit purposes."""
        return hashlib.sha256(content.encode()).hexdigest()[:16]


class InputValidator:
    def validate(self, request: dict) -> ValidationResult:
        """Validate AI request input."""
        errors = []

        # Length limits
        if len(request.get("prompt", "")) > 100000:
            errors.append("Prompt exceeds maximum length")

        # Character validation
        if self.contains_control_chars(request.get("prompt", "")):
            errors.append("Invalid characters in prompt")

        return ValidationResult(passed=len(errors) == 0, errors=errors)

Comprehensive security protects AI systems from emerging threats.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.