Skip to content
Back to Blog
1 min read

AI FinOps: Managing AI Costs at Scale

I wrote “AI FinOps: Managing AI Costs at Scale” to share practical, production-minded guidance on this topic.

AI FinOps Implementation

from dataclasses import dataclass
from typing import Dict, List
from datetime import datetime, timedelta
import pandas as pd

@dataclass
class AIUsageRecord:
    timestamp: datetime
    service: str
    model: str
    input_tokens: int
    output_tokens: int
    cost: float
    team: str
    project: str

class AIFinOps:
    def __init__(self):
        self.usage_store = UsageStore()
        self.budgets = {}
        self.alerts = []

    def track_usage(self, record: AIUsageRecord):
        """Track AI usage for cost analysis."""
        self.usage_store.save(record)

        # Check budget
        self.check_budget(record)

    def set_budget(self, team: str, monthly_budget: float):
        """Set monthly budget for team."""
        self.budgets[team] = {
            "monthly_limit": monthly_budget,
            "alert_threshold": 0.8  # Alert at 80%
        }

    def check_budget(self, record: AIUsageRecord):
        """Check if usage is within budget."""
        if record.team not in self.budgets:
            return

        budget = self.budgets[record.team]
        current_spend = self.get_monthly_spend(record.team)

        if current_spend >= budget["monthly_limit"]:
            self.create_alert("budget_exceeded", record.team, current_spend)
        elif current_spend >= budget["monthly_limit"] * budget["alert_threshold"]:
            self.create_alert("budget_warning", record.team, current_spend)

    def get_cost_breakdown(self, start_date: datetime, end_date: datetime) -> Dict:
        """Get detailed cost breakdown."""
        records = self.usage_store.query(start_date, end_date)
        df = pd.DataFrame([r.__dict__ for r in records])

        return {
            "total_cost": df["cost"].sum(),
            "by_team": df.groupby("team")["cost"].sum().to_dict(),
            "by_model": df.groupby("model")["cost"].sum().to_dict(),
            "by_project": df.groupby("project")["cost"].sum().to_dict(),
            "by_service": df.groupby("service")["cost"].sum().to_dict(),
            "daily_trend": df.groupby(df["timestamp"].dt.date)["cost"].sum().to_dict()
        }

    def identify_savings_opportunities(self) -> List[Dict]:
        """Identify cost optimization opportunities."""
        opportunities = []

        # Check for model downgrade opportunities
        model_analysis = self.analyze_model_usage()
        for model, stats in model_analysis.items():
            if stats["avg_complexity"] < 0.3 and model == "gpt-4o":
                opportunities.append({
                    "type": "model_downgrade",
                    "description": f"Switch simple queries from {model} to gpt-4o-mini",
                    "potential_savings": stats["cost"] * 0.7
                })

        # Check for caching opportunities
        cache_analysis = self.analyze_cache_potential()
        if cache_analysis["duplicate_rate"] > 0.1:
            opportunities.append({
                "type": "caching",
                "description": "Implement prompt caching for repeated queries",
                "potential_savings": cache_analysis["potential_savings"]
            })

        # Check for batch processing opportunities
        realtime_analysis = self.analyze_realtime_requirements()
        if realtime_analysis["batchable_percent"] > 0.2:
            opportunities.append({
                "type": "batching",
                "description": "Batch non-urgent requests for cost efficiency",
                "potential_savings": realtime_analysis["potential_savings"]
            })

        return sorted(opportunities, key=lambda x: -x["potential_savings"])

    def generate_chargeback_report(self, month: datetime) -> Dict:
        """Generate chargeback report for teams."""
        start = month.replace(day=1)
        end = (start + timedelta(days=32)).replace(day=1)

        breakdown = self.get_cost_breakdown(start, end)

        return {
            "period": month.strftime("%Y-%m"),
            "total_cost": breakdown["total_cost"],
            "team_charges": breakdown["by_team"],
            "details": self.get_detailed_charges(start, end)
        }

AI FinOps provides visibility and control over AI infrastructure costs.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.