Back to Blog
2 min read

AI FinOps: Managing AI Costs at Scale

AI FinOps applies financial operations principles to AI workloads. Here’s how to implement it.

AI FinOps Implementation

from dataclasses import dataclass
from typing import Dict, List
from datetime import datetime, timedelta
import pandas as pd

@dataclass
class AIUsageRecord:
    timestamp: datetime
    service: str
    model: str
    input_tokens: int
    output_tokens: int
    cost: float
    team: str
    project: str

class AIFinOps:
    def __init__(self):
        self.usage_store = UsageStore()
        self.budgets = {}
        self.alerts = []

    def track_usage(self, record: AIUsageRecord):
        """Track AI usage for cost analysis."""
        self.usage_store.save(record)

        # Check budget
        self.check_budget(record)

    def set_budget(self, team: str, monthly_budget: float):
        """Set monthly budget for team."""
        self.budgets[team] = {
            "monthly_limit": monthly_budget,
            "alert_threshold": 0.8  # Alert at 80%
        }

    def check_budget(self, record: AIUsageRecord):
        """Check if usage is within budget."""
        if record.team not in self.budgets:
            return

        budget = self.budgets[record.team]
        current_spend = self.get_monthly_spend(record.team)

        if current_spend >= budget["monthly_limit"]:
            self.create_alert("budget_exceeded", record.team, current_spend)
        elif current_spend >= budget["monthly_limit"] * budget["alert_threshold"]:
            self.create_alert("budget_warning", record.team, current_spend)

    def get_cost_breakdown(self, start_date: datetime, end_date: datetime) -> Dict:
        """Get detailed cost breakdown."""
        records = self.usage_store.query(start_date, end_date)
        df = pd.DataFrame([r.__dict__ for r in records])

        return {
            "total_cost": df["cost"].sum(),
            "by_team": df.groupby("team")["cost"].sum().to_dict(),
            "by_model": df.groupby("model")["cost"].sum().to_dict(),
            "by_project": df.groupby("project")["cost"].sum().to_dict(),
            "by_service": df.groupby("service")["cost"].sum().to_dict(),
            "daily_trend": df.groupby(df["timestamp"].dt.date)["cost"].sum().to_dict()
        }

    def identify_savings_opportunities(self) -> List[Dict]:
        """Identify cost optimization opportunities."""
        opportunities = []

        # Check for model downgrade opportunities
        model_analysis = self.analyze_model_usage()
        for model, stats in model_analysis.items():
            if stats["avg_complexity"] < 0.3 and model == "gpt-4o":
                opportunities.append({
                    "type": "model_downgrade",
                    "description": f"Switch simple queries from {model} to gpt-4o-mini",
                    "potential_savings": stats["cost"] * 0.7
                })

        # Check for caching opportunities
        cache_analysis = self.analyze_cache_potential()
        if cache_analysis["duplicate_rate"] > 0.1:
            opportunities.append({
                "type": "caching",
                "description": "Implement prompt caching for repeated queries",
                "potential_savings": cache_analysis["potential_savings"]
            })

        # Check for batch processing opportunities
        realtime_analysis = self.analyze_realtime_requirements()
        if realtime_analysis["batchable_percent"] > 0.2:
            opportunities.append({
                "type": "batching",
                "description": "Batch non-urgent requests for cost efficiency",
                "potential_savings": realtime_analysis["potential_savings"]
            })

        return sorted(opportunities, key=lambda x: -x["potential_savings"])

    def generate_chargeback_report(self, month: datetime) -> Dict:
        """Generate chargeback report for teams."""
        start = month.replace(day=1)
        end = (start + timedelta(days=32)).replace(day=1)

        breakdown = self.get_cost_breakdown(start, end)

        return {
            "period": month.strftime("%Y-%m"),
            "total_cost": breakdown["total_cost"],
            "team_charges": breakdown["by_team"],
            "details": self.get_detailed_charges(start, end)
        }

AI FinOps provides visibility and control over AI infrastructure costs.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.