Skip to content
Back to Blog
1 min read

Reserved Capacity for AI: Optimizing Long-Term Costs

I wrote “Reserved Capacity for AI: Optimizing Long-Term Costs” to share practical, production-minded guidance on this topic.

Reserved Capacity Strategy

from dataclasses import dataclass
from typing import Dict, List
import numpy as np

@dataclass
class ReservationOption:
    term_months: int
    discount_percent: float
    commitment_tpm: int  # Tokens per minute
    monthly_cost: float

class AIReservationPlanner:
    def __init__(self):
        self.usage_history = []
        self.reservation_options = self.load_options()

    def analyze_usage_pattern(self, days: int = 90) -> Dict:
        """Analyze usage patterns for reservation planning."""
        usage = self.get_usage_data(days)

        return {
            "avg_daily_tokens": np.mean(usage["daily_tokens"]),
            "p50_daily_tokens": np.percentile(usage["daily_tokens"], 50),
            "p90_daily_tokens": np.percentile(usage["daily_tokens"], 90),
            "p99_daily_tokens": np.percentile(usage["daily_tokens"], 99),
            "growth_trend": self.calculate_growth_trend(usage),
            "seasonality": self.detect_seasonality(usage)
        }

    def recommend_reservation(self, analysis: Dict, risk_tolerance: str = "medium") -> Dict:
        """Recommend optimal reservation strategy."""
        # Determine baseline commitment level
        if risk_tolerance == "low":
            baseline = analysis["p50_daily_tokens"]
        elif risk_tolerance == "medium":
            baseline = analysis["avg_daily_tokens"]
        else:
            baseline = analysis["p90_daily_tokens"]

        # Account for growth
        projected_baseline = baseline * (1 + analysis["growth_trend"])

        # Find best reservation option
        best_option = None
        best_savings = 0

        for option in self.reservation_options:
            if option.commitment_tpm <= projected_baseline:
                savings = self.calculate_savings(option, projected_baseline)
                if savings > best_savings:
                    best_savings = savings
                    best_option = option

        return {
            "recommended_commitment": projected_baseline,
            "recommended_option": best_option,
            "estimated_monthly_savings": best_savings,
            "payback_period_months": self.calculate_payback(best_option, best_savings),
            "risk_analysis": self.analyze_risk(best_option, analysis)
        }

    def calculate_savings(self, option: ReservationOption, usage: float) -> float:
        """Calculate monthly savings from reservation."""
        # Pay-as-you-go cost
        payg_cost = usage * self.payg_rate

        # Reserved cost
        reserved_cost = option.monthly_cost

        # Overage cost (usage above commitment at PAYG rate)
        overage = max(0, usage - option.commitment_tpm)
        overage_cost = overage * self.payg_rate

        return payg_cost - (reserved_cost + overage_cost)

    def hybrid_strategy(self, analysis: Dict) -> Dict:
        """Design hybrid reserved + on-demand strategy."""
        # Reserve for baseline load
        baseline_commitment = analysis["p50_daily_tokens"]

        # Use on-demand for variable load
        variable_load = analysis["p90_daily_tokens"] - baseline_commitment

        return {
            "reserved": {
                "commitment": baseline_commitment,
                "expected_cost": self.calculate_reserved_cost(baseline_commitment)
            },
            "on_demand": {
                "expected_usage": variable_load,
                "expected_cost": variable_load * self.payg_rate
            },
            "total_expected_cost": self.calculate_hybrid_cost(baseline_commitment, variable_load),
            "vs_pure_payg_savings": self.calculate_hybrid_savings(analysis)
        }

Strategic use of reserved capacity can reduce AI costs by 30-50% for predictable workloads.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.