Skip to content
Back to Blog
1 min read

Enterprise AI: Building Secure API Gateways for LLM Access

I wrote “Enterprise AI: Building Secure API Gateways for LLM Access” to share practical, production-minded guidance on this topic.

Gateway Architecture

from dataclasses import dataclass
from typing import Dict, Optional, List
from datetime import datetime, timedelta
import hashlib
import asyncio

@dataclass
class APIPolicy:
    rate_limit_per_minute: int
    daily_token_budget: int
    allowed_models: List[str]
    content_filtering_enabled: bool
    pii_detection_enabled: bool

@dataclass
class ClientContext:
    client_id: str
    department: str
    policy: APIPolicy
    tokens_used_today: int = 0
    requests_this_minute: int = 0

class LLMGateway:
    def __init__(self, backend_client, content_filter):
        self.backend_client = backend_client
        self.content_filter = content_filter
        self.clients: Dict[str, ClientContext] = {}
        self.request_log: List[Dict] = []

    async def process_request(self, api_key: str, request: Dict) -> Dict:
        """Process LLM request through gateway policies."""

        # Authenticate and get client context
        client = self._authenticate(api_key)
        if not client:
            return {"error": "Invalid API key", "status": 401}

        # Rate limiting
        if not self._check_rate_limit(client):
            return {"error": "Rate limit exceeded", "status": 429}

        # Token budget check
        estimated_tokens = self._estimate_tokens(request)
        if client.tokens_used_today + estimated_tokens > client.policy.daily_token_budget:
            return {"error": "Daily token budget exceeded", "status": 429}

        # Model authorization
        model = request.get("model", "gpt-4o")
        if model not in client.policy.allowed_models:
            return {"error": f"Model {model} not authorized", "status": 403}

        # Content filtering
        if client.policy.content_filtering_enabled:
            filter_result = await self.content_filter.check(request.get("messages", []))
            if not filter_result["safe"]:
                self._log_blocked_request(client, request, filter_result)
                return {"error": "Content policy violation", "status": 400}

        # PII detection
        if client.policy.pii_detection_enabled:
            pii_result = await self._detect_pii(request)
            if pii_result["pii_detected"]:
                return {"error": "PII detected in request", "status": 400,
                        "details": pii_result["types"]}

        # Forward to backend
        response = await self.backend_client.chat.completions.create(**request)

        # Update usage tracking
        actual_tokens = response.usage.total_tokens
        client.tokens_used_today += actual_tokens

        # Log for billing
        self._log_request(client, request, response, actual_tokens)

        return {
            "response": response,
            "usage": {
                "tokens_used": actual_tokens,
                "daily_remaining": client.policy.daily_token_budget - client.tokens_used_today
            }
        }

    def _check_rate_limit(self, client: ClientContext) -> bool:
        """Check if client is within rate limits."""
        client.requests_this_minute += 1
        return client.requests_this_minute <= client.policy.rate_limit_per_minute

    def _estimate_tokens(self, request: Dict) -> int:
        """Estimate token count for budget checking."""
        messages = request.get("messages", [])
        text = " ".join(m.get("content", "") for m in messages)
        return len(text) // 4  # Rough estimate

    def _log_request(self, client: ClientContext, request: Dict,
                     response: any, tokens: int):
        """Log request for billing and audit."""
        self.request_log.append({
            "timestamp": datetime.now().isoformat(),
            "client_id": client.client_id,
            "department": client.department,
            "model": request.get("model"),
            "tokens": tokens,
            "latency_ms": getattr(response, "_response_ms", 0)
        })

Cost Allocation by Department

def generate_billing_report(gateway: LLMGateway, period_start: datetime) -> Dict:
    """Generate department-level billing report."""
    costs_by_dept = {}

    for log in gateway.request_log:
        if datetime.fromisoformat(log["timestamp"]) >= period_start:
            dept = log["department"]
            if dept not in costs_by_dept:
                costs_by_dept[dept] = {"tokens": 0, "requests": 0}

            costs_by_dept[dept]["tokens"] += log["tokens"]
            costs_by_dept[dept]["requests"] += 1

    return costs_by_dept

A well-designed gateway enables enterprise-wide AI adoption while maintaining security, compliance, and cost control.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.