Back to Blog
2 min read

Capacity Planning for AI: Scaling AI Infrastructure

AI workloads require careful capacity planning due to variable compute requirements and costs.

AI Capacity Planning

from dataclasses import dataclass
from typing import Dict, List
import numpy as np
from datetime import datetime, timedelta

@dataclass
class AIWorkload:
    name: str
    avg_requests_per_hour: float
    avg_tokens_per_request: int
    peak_multiplier: float
    latency_sla_ms: int

class AICapacityPlanner:
    def __init__(self):
        self.workloads: Dict[str, AIWorkload] = {}
        self.model_specs = self.load_model_specs()

    def add_workload(self, workload: AIWorkload):
        self.workloads[workload.name] = workload

    def calculate_required_capacity(self, workload: AIWorkload, model: str) -> Dict:
        """Calculate required capacity for workload."""
        spec = self.model_specs[model]

        # Calculate tokens per second required
        tokens_per_second = (
            workload.avg_requests_per_hour *
            workload.avg_tokens_per_request / 3600
        )

        # Account for peak load
        peak_tokens_per_second = tokens_per_second * workload.peak_multiplier

        # Calculate required TPM (tokens per minute)
        required_tpm = peak_tokens_per_second * 60

        # Calculate instances needed
        instances = np.ceil(required_tpm / spec["tpm_per_instance"])

        # Calculate cost
        hourly_cost = instances * spec["cost_per_hour"]
        monthly_cost = hourly_cost * 24 * 30

        return {
            "workload": workload.name,
            "model": model,
            "required_tpm": required_tpm,
            "instances": int(instances),
            "hourly_cost": hourly_cost,
            "monthly_cost": monthly_cost,
            "meets_sla": self.check_sla(spec, workload.latency_sla_ms)
        }

    def forecast_growth(self, historical_data: List[Dict], months_ahead: int = 6) -> Dict:
        """Forecast capacity needs based on growth."""
        # Extract time series
        dates = [d["date"] for d in historical_data]
        requests = [d["requests"] for d in historical_data]

        # Fit growth model
        growth_rate = self.calculate_growth_rate(dates, requests)

        # Project future needs
        projections = []
        current = requests[-1]

        for month in range(1, months_ahead + 1):
            projected = current * (1 + growth_rate) ** month
            projections.append({
                "month": month,
                "projected_requests": projected,
                "required_capacity": self.calculate_capacity_for_requests(projected)
            })

        return {
            "growth_rate": growth_rate,
            "projections": projections,
            "recommended_buffer": 0.2  # 20% buffer
        }

    def optimize_model_mix(self, workloads: List[AIWorkload], budget: float) -> Dict:
        """Optimize model selection within budget."""
        options = []

        for workload in workloads:
            workload_options = []
            for model in self.model_specs.keys():
                capacity = self.calculate_required_capacity(workload, model)
                if capacity["monthly_cost"] <= budget and capacity["meets_sla"]:
                    workload_options.append({
                        "model": model,
                        "cost": capacity["monthly_cost"],
                        "quality_score": self.model_specs[model]["quality_score"]
                    })
            options.append({
                "workload": workload.name,
                "options": sorted(workload_options, key=lambda x: -x["quality_score"])
            })

        return {
            "budget": budget,
            "recommendations": options
        }

Strategic capacity planning ensures AI systems can scale efficiently within budget.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.