2 min read
Capacity Planning for AI: Scaling AI Infrastructure
AI workloads require careful capacity planning due to variable compute requirements and costs.
AI Capacity Planning
from dataclasses import dataclass
from typing import Dict, List
import numpy as np
from datetime import datetime, timedelta
@dataclass
class AIWorkload:
name: str
avg_requests_per_hour: float
avg_tokens_per_request: int
peak_multiplier: float
latency_sla_ms: int
class AICapacityPlanner:
def __init__(self):
self.workloads: Dict[str, AIWorkload] = {}
self.model_specs = self.load_model_specs()
def add_workload(self, workload: AIWorkload):
self.workloads[workload.name] = workload
def calculate_required_capacity(self, workload: AIWorkload, model: str) -> Dict:
"""Calculate required capacity for workload."""
spec = self.model_specs[model]
# Calculate tokens per second required
tokens_per_second = (
workload.avg_requests_per_hour *
workload.avg_tokens_per_request / 3600
)
# Account for peak load
peak_tokens_per_second = tokens_per_second * workload.peak_multiplier
# Calculate required TPM (tokens per minute)
required_tpm = peak_tokens_per_second * 60
# Calculate instances needed
instances = np.ceil(required_tpm / spec["tpm_per_instance"])
# Calculate cost
hourly_cost = instances * spec["cost_per_hour"]
monthly_cost = hourly_cost * 24 * 30
return {
"workload": workload.name,
"model": model,
"required_tpm": required_tpm,
"instances": int(instances),
"hourly_cost": hourly_cost,
"monthly_cost": monthly_cost,
"meets_sla": self.check_sla(spec, workload.latency_sla_ms)
}
def forecast_growth(self, historical_data: List[Dict], months_ahead: int = 6) -> Dict:
"""Forecast capacity needs based on growth."""
# Extract time series
dates = [d["date"] for d in historical_data]
requests = [d["requests"] for d in historical_data]
# Fit growth model
growth_rate = self.calculate_growth_rate(dates, requests)
# Project future needs
projections = []
current = requests[-1]
for month in range(1, months_ahead + 1):
projected = current * (1 + growth_rate) ** month
projections.append({
"month": month,
"projected_requests": projected,
"required_capacity": self.calculate_capacity_for_requests(projected)
})
return {
"growth_rate": growth_rate,
"projections": projections,
"recommended_buffer": 0.2 # 20% buffer
}
def optimize_model_mix(self, workloads: List[AIWorkload], budget: float) -> Dict:
"""Optimize model selection within budget."""
options = []
for workload in workloads:
workload_options = []
for model in self.model_specs.keys():
capacity = self.calculate_required_capacity(workload, model)
if capacity["monthly_cost"] <= budget and capacity["meets_sla"]:
workload_options.append({
"model": model,
"cost": capacity["monthly_cost"],
"quality_score": self.model_specs[model]["quality_score"]
})
options.append({
"workload": workload.name,
"options": sorted(workload_options, key=lambda x: -x["quality_score"])
})
return {
"budget": budget,
"recommendations": options
}
Strategic capacity planning ensures AI systems can scale efficiently within budget.