Back to Blog
2 min read

AI Security Best Practices: Protecting AI Systems

AI systems face unique security challenges. Here’s how to protect them effectively.

AI Security Implementation

from dataclasses import dataclass
from typing import List, Optional
import hashlib
import hmac

@dataclass
class SecurityConfig:
    encrypt_prompts: bool = True
    rate_limiting: bool = True
    input_validation: bool = True
    output_filtering: bool = True
    audit_logging: bool = True

class AISecurityLayer:
    def __init__(self, config: SecurityConfig):
        self.config = config
        self.rate_limiter = RateLimiter()
        self.validator = InputValidator()
        self.filter = OutputFilter()
        self.audit = AuditLogger()

    async def secure_request(self, request: dict, user_context: dict) -> dict:
        """Apply security controls to AI request."""

        # Rate limiting
        if self.config.rate_limiting:
            if not self.rate_limiter.allow(user_context["user_id"]):
                raise RateLimitError("Too many requests")

        # Input validation
        if self.config.input_validation:
            validation = self.validator.validate(request)
            if not validation.passed:
                self.audit.log("input_validation_failed", request, validation.errors)
                raise ValidationError(validation.errors)

        # Prompt injection detection
        if self.detect_injection(request.get("prompt", "")):
            self.audit.log("injection_detected", request, user_context)
            raise SecurityError("Potential prompt injection detected")

        # Audit logging
        if self.config.audit_logging:
            self.audit.log("request_received", request, user_context)

        return request

    async def secure_response(self, response: str, request: dict) -> str:
        """Apply security controls to AI response."""

        # Output filtering
        if self.config.output_filtering:
            # Remove PII
            response = self.filter.remove_pii(response)

            # Remove sensitive data patterns
            response = self.filter.remove_sensitive(response)

            # Check for harmful content
            if self.filter.is_harmful(response):
                self.audit.log("harmful_content_blocked", request, response)
                return "I cannot provide that information."

        # Audit logging
        if self.config.audit_logging:
            self.audit.log("response_sent", request, {"response_hash": self.hash(response)})

        return response

    def detect_injection(self, prompt: str) -> bool:
        """Detect potential prompt injection attacks."""
        injection_patterns = [
            r"ignore (?:all |previous |above )?instructions",
            r"you are now",
            r"new instructions:",
            r"<\|.*\|>",
            r"system:",
            r"assistant:",
            r"\[INST\]",
            r"human:",
            r"```\s*system"
        ]

        import re
        for pattern in injection_patterns:
            if re.search(pattern, prompt.lower()):
                return True

        return False

    def hash(self, content: str) -> str:
        """Create hash for audit purposes."""
        return hashlib.sha256(content.encode()).hexdigest()[:16]


class InputValidator:
    def validate(self, request: dict) -> ValidationResult:
        """Validate AI request input."""
        errors = []

        # Length limits
        if len(request.get("prompt", "")) > 100000:
            errors.append("Prompt exceeds maximum length")

        # Character validation
        if self.contains_control_chars(request.get("prompt", "")):
            errors.append("Invalid characters in prompt")

        return ValidationResult(passed=len(errors) == 0, errors=errors)

Comprehensive security protects AI systems from emerging threats.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.