2 min read
AI FinOps: Managing AI Costs at Scale
AI FinOps applies financial operations principles to AI workloads. Here’s how to implement it.
AI FinOps Implementation
from dataclasses import dataclass
from typing import Dict, List
from datetime import datetime, timedelta
import pandas as pd
@dataclass
class AIUsageRecord:
timestamp: datetime
service: str
model: str
input_tokens: int
output_tokens: int
cost: float
team: str
project: str
class AIFinOps:
def __init__(self):
self.usage_store = UsageStore()
self.budgets = {}
self.alerts = []
def track_usage(self, record: AIUsageRecord):
"""Track AI usage for cost analysis."""
self.usage_store.save(record)
# Check budget
self.check_budget(record)
def set_budget(self, team: str, monthly_budget: float):
"""Set monthly budget for team."""
self.budgets[team] = {
"monthly_limit": monthly_budget,
"alert_threshold": 0.8 # Alert at 80%
}
def check_budget(self, record: AIUsageRecord):
"""Check if usage is within budget."""
if record.team not in self.budgets:
return
budget = self.budgets[record.team]
current_spend = self.get_monthly_spend(record.team)
if current_spend >= budget["monthly_limit"]:
self.create_alert("budget_exceeded", record.team, current_spend)
elif current_spend >= budget["monthly_limit"] * budget["alert_threshold"]:
self.create_alert("budget_warning", record.team, current_spend)
def get_cost_breakdown(self, start_date: datetime, end_date: datetime) -> Dict:
"""Get detailed cost breakdown."""
records = self.usage_store.query(start_date, end_date)
df = pd.DataFrame([r.__dict__ for r in records])
return {
"total_cost": df["cost"].sum(),
"by_team": df.groupby("team")["cost"].sum().to_dict(),
"by_model": df.groupby("model")["cost"].sum().to_dict(),
"by_project": df.groupby("project")["cost"].sum().to_dict(),
"by_service": df.groupby("service")["cost"].sum().to_dict(),
"daily_trend": df.groupby(df["timestamp"].dt.date)["cost"].sum().to_dict()
}
def identify_savings_opportunities(self) -> List[Dict]:
"""Identify cost optimization opportunities."""
opportunities = []
# Check for model downgrade opportunities
model_analysis = self.analyze_model_usage()
for model, stats in model_analysis.items():
if stats["avg_complexity"] < 0.3 and model == "gpt-4o":
opportunities.append({
"type": "model_downgrade",
"description": f"Switch simple queries from {model} to gpt-4o-mini",
"potential_savings": stats["cost"] * 0.7
})
# Check for caching opportunities
cache_analysis = self.analyze_cache_potential()
if cache_analysis["duplicate_rate"] > 0.1:
opportunities.append({
"type": "caching",
"description": "Implement prompt caching for repeated queries",
"potential_savings": cache_analysis["potential_savings"]
})
# Check for batch processing opportunities
realtime_analysis = self.analyze_realtime_requirements()
if realtime_analysis["batchable_percent"] > 0.2:
opportunities.append({
"type": "batching",
"description": "Batch non-urgent requests for cost efficiency",
"potential_savings": realtime_analysis["potential_savings"]
})
return sorted(opportunities, key=lambda x: -x["potential_savings"])
def generate_chargeback_report(self, month: datetime) -> Dict:
"""Generate chargeback report for teams."""
start = month.replace(day=1)
end = (start + timedelta(days=32)).replace(day=1)
breakdown = self.get_cost_breakdown(start, end)
return {
"period": month.strftime("%Y-%m"),
"total_cost": breakdown["total_cost"],
"team_charges": breakdown["by_team"],
"details": self.get_detailed_charges(start, end)
}
AI FinOps provides visibility and control over AI infrastructure costs.