1 min read
Capacity Planning for AI: Scaling AI Infrastructure
I wrote “Capacity Planning for AI: Scaling AI Infrastructure” to share practical, production-minded guidance on this topic.
AI Capacity Planning
from dataclasses import dataclass
from typing import Dict, List
import numpy as np
from datetime import datetime, timedelta
@dataclass
class AIWorkload:
name: str
avg_requests_per_hour: float
avg_tokens_per_request: int
peak_multiplier: float
latency_sla_ms: int
class AICapacityPlanner:
def __init__(self):
self.workloads: Dict[str, AIWorkload] = {}
self.model_specs = self.load_model_specs()
def add_workload(self, workload: AIWorkload):
self.workloads[workload.name] = workload
def calculate_required_capacity(self, workload: AIWorkload, model: str) -> Dict:
"""Calculate required capacity for workload."""
spec = self.model_specs[model]
# Calculate tokens per second required
tokens_per_second = (
workload.avg_requests_per_hour *
workload.avg_tokens_per_request / 3600
)
# Account for peak load
peak_tokens_per_second = tokens_per_second * workload.peak_multiplier
# Calculate required TPM (tokens per minute)
required_tpm = peak_tokens_per_second * 60
# Calculate instances needed
instances = np.ceil(required_tpm / spec["tpm_per_instance"])
# Calculate cost
hourly_cost = instances * spec["cost_per_hour"]
monthly_cost = hourly_cost * 24 * 30
return {
"workload": workload.name,
"model": model,
"required_tpm": required_tpm,
"instances": int(instances),
"hourly_cost": hourly_cost,
"monthly_cost": monthly_cost,
"meets_sla": self.check_sla(spec, workload.latency_sla_ms)
}
def forecast_growth(self, historical_data: List[Dict], months_ahead: int = 6) -> Dict:
"""Forecast capacity needs based on growth."""
# Extract time series
dates = [d["date"] for d in historical_data]
requests = [d["requests"] for d in historical_data]
# Fit growth model
growth_rate = self.calculate_growth_rate(dates, requests)
# Project future needs
projections = []
current = requests[-1]
for month in range(1, months_ahead + 1):
projected = current * (1 + growth_rate) ** month
projections.append({
"month": month,
"projected_requests": projected,
"required_capacity": self.calculate_capacity_for_requests(projected)
})
return {
"growth_rate": growth_rate,
"projections": projections,
"recommended_buffer": 0.2 # 20% buffer
}
def optimize_model_mix(self, workloads: List[AIWorkload], budget: float) -> Dict:
"""Optimize model selection within budget."""
options = []
for workload in workloads:
workload_options = []
for model in self.model_specs.keys():
capacity = self.calculate_required_capacity(workload, model)
if capacity["monthly_cost"] <= budget and capacity["meets_sla"]:
workload_options.append({
"model": model,
"cost": capacity["monthly_cost"],
"quality_score": self.model_specs[model]["quality_score"]
})
options.append({
"workload": workload.name,
"options": sorted(workload_options, key=lambda x: -x["quality_score"])
})
return {
"budget": budget,
"recommendations": options
}
Strategic capacity planning ensures AI systems can scale efficiently within budget.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n