Back to Blog
2 min read

Reserved Capacity for AI: Optimizing Long-Term Costs

Reserved capacity for AI services can significantly reduce costs. Here’s when and how to use it.

Reserved Capacity Strategy

from dataclasses import dataclass
from typing import Dict, List
import numpy as np

@dataclass
class ReservationOption:
    term_months: int
    discount_percent: float
    commitment_tpm: int  # Tokens per minute
    monthly_cost: float

class AIReservationPlanner:
    def __init__(self):
        self.usage_history = []
        self.reservation_options = self.load_options()

    def analyze_usage_pattern(self, days: int = 90) -> Dict:
        """Analyze usage patterns for reservation planning."""
        usage = self.get_usage_data(days)

        return {
            "avg_daily_tokens": np.mean(usage["daily_tokens"]),
            "p50_daily_tokens": np.percentile(usage["daily_tokens"], 50),
            "p90_daily_tokens": np.percentile(usage["daily_tokens"], 90),
            "p99_daily_tokens": np.percentile(usage["daily_tokens"], 99),
            "growth_trend": self.calculate_growth_trend(usage),
            "seasonality": self.detect_seasonality(usage)
        }

    def recommend_reservation(self, analysis: Dict, risk_tolerance: str = "medium") -> Dict:
        """Recommend optimal reservation strategy."""
        # Determine baseline commitment level
        if risk_tolerance == "low":
            baseline = analysis["p50_daily_tokens"]
        elif risk_tolerance == "medium":
            baseline = analysis["avg_daily_tokens"]
        else:
            baseline = analysis["p90_daily_tokens"]

        # Account for growth
        projected_baseline = baseline * (1 + analysis["growth_trend"])

        # Find best reservation option
        best_option = None
        best_savings = 0

        for option in self.reservation_options:
            if option.commitment_tpm <= projected_baseline:
                savings = self.calculate_savings(option, projected_baseline)
                if savings > best_savings:
                    best_savings = savings
                    best_option = option

        return {
            "recommended_commitment": projected_baseline,
            "recommended_option": best_option,
            "estimated_monthly_savings": best_savings,
            "payback_period_months": self.calculate_payback(best_option, best_savings),
            "risk_analysis": self.analyze_risk(best_option, analysis)
        }

    def calculate_savings(self, option: ReservationOption, usage: float) -> float:
        """Calculate monthly savings from reservation."""
        # Pay-as-you-go cost
        payg_cost = usage * self.payg_rate

        # Reserved cost
        reserved_cost = option.monthly_cost

        # Overage cost (usage above commitment at PAYG rate)
        overage = max(0, usage - option.commitment_tpm)
        overage_cost = overage * self.payg_rate

        return payg_cost - (reserved_cost + overage_cost)

    def hybrid_strategy(self, analysis: Dict) -> Dict:
        """Design hybrid reserved + on-demand strategy."""
        # Reserve for baseline load
        baseline_commitment = analysis["p50_daily_tokens"]

        # Use on-demand for variable load
        variable_load = analysis["p90_daily_tokens"] - baseline_commitment

        return {
            "reserved": {
                "commitment": baseline_commitment,
                "expected_cost": self.calculate_reserved_cost(baseline_commitment)
            },
            "on_demand": {
                "expected_usage": variable_load,
                "expected_cost": variable_load * self.payg_rate
            },
            "total_expected_cost": self.calculate_hybrid_cost(baseline_commitment, variable_load),
            "vs_pure_payg_savings": self.calculate_hybrid_savings(analysis)
        }

Strategic use of reserved capacity can reduce AI costs by 30-50% for predictable workloads.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.