Skip to content
Back to Blog
1 min read

Capacity Planning for AI: Scaling AI Infrastructure

I wrote “Capacity Planning for AI: Scaling AI Infrastructure” to share practical, production-minded guidance on this topic.

AI Capacity Planning

from dataclasses import dataclass
from typing import Dict, List
import numpy as np
from datetime import datetime, timedelta

@dataclass
class AIWorkload:
    name: str
    avg_requests_per_hour: float
    avg_tokens_per_request: int
    peak_multiplier: float
    latency_sla_ms: int

class AICapacityPlanner:
    def __init__(self):
        self.workloads: Dict[str, AIWorkload] = {}
        self.model_specs = self.load_model_specs()

    def add_workload(self, workload: AIWorkload):
        self.workloads[workload.name] = workload

    def calculate_required_capacity(self, workload: AIWorkload, model: str) -> Dict:
        """Calculate required capacity for workload."""
        spec = self.model_specs[model]

        # Calculate tokens per second required
        tokens_per_second = (
            workload.avg_requests_per_hour *
            workload.avg_tokens_per_request / 3600
        )

        # Account for peak load
        peak_tokens_per_second = tokens_per_second * workload.peak_multiplier

        # Calculate required TPM (tokens per minute)
        required_tpm = peak_tokens_per_second * 60

        # Calculate instances needed
        instances = np.ceil(required_tpm / spec["tpm_per_instance"])

        # Calculate cost
        hourly_cost = instances * spec["cost_per_hour"]
        monthly_cost = hourly_cost * 24 * 30

        return {
            "workload": workload.name,
            "model": model,
            "required_tpm": required_tpm,
            "instances": int(instances),
            "hourly_cost": hourly_cost,
            "monthly_cost": monthly_cost,
            "meets_sla": self.check_sla(spec, workload.latency_sla_ms)
        }

    def forecast_growth(self, historical_data: List[Dict], months_ahead: int = 6) -> Dict:
        """Forecast capacity needs based on growth."""
        # Extract time series
        dates = [d["date"] for d in historical_data]
        requests = [d["requests"] for d in historical_data]

        # Fit growth model
        growth_rate = self.calculate_growth_rate(dates, requests)

        # Project future needs
        projections = []
        current = requests[-1]

        for month in range(1, months_ahead + 1):
            projected = current * (1 + growth_rate) ** month
            projections.append({
                "month": month,
                "projected_requests": projected,
                "required_capacity": self.calculate_capacity_for_requests(projected)
            })

        return {
            "growth_rate": growth_rate,
            "projections": projections,
            "recommended_buffer": 0.2  # 20% buffer
        }

    def optimize_model_mix(self, workloads: List[AIWorkload], budget: float) -> Dict:
        """Optimize model selection within budget."""
        options = []

        for workload in workloads:
            workload_options = []
            for model in self.model_specs.keys():
                capacity = self.calculate_required_capacity(workload, model)
                if capacity["monthly_cost"] <= budget and capacity["meets_sla"]:
                    workload_options.append({
                        "model": model,
                        "cost": capacity["monthly_cost"],
                        "quality_score": self.model_specs[model]["quality_score"]
                    })
            options.append({
                "workload": workload.name,
                "options": sorted(workload_options, key=lambda x: -x["quality_score"])
            })

        return {
            "budget": budget,
            "recommendations": options
        }

Strategic capacity planning ensures AI systems can scale efficiently within budget.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.