1 min read
AI FinOps: Managing AI Costs at Scale
I wrote “AI FinOps: Managing AI Costs at Scale” to share practical, production-minded guidance on this topic.
AI FinOps Implementation
from dataclasses import dataclass
from typing import Dict, List
from datetime import datetime, timedelta
import pandas as pd
@dataclass
class AIUsageRecord:
timestamp: datetime
service: str
model: str
input_tokens: int
output_tokens: int
cost: float
team: str
project: str
class AIFinOps:
def __init__(self):
self.usage_store = UsageStore()
self.budgets = {}
self.alerts = []
def track_usage(self, record: AIUsageRecord):
"""Track AI usage for cost analysis."""
self.usage_store.save(record)
# Check budget
self.check_budget(record)
def set_budget(self, team: str, monthly_budget: float):
"""Set monthly budget for team."""
self.budgets[team] = {
"monthly_limit": monthly_budget,
"alert_threshold": 0.8 # Alert at 80%
}
def check_budget(self, record: AIUsageRecord):
"""Check if usage is within budget."""
if record.team not in self.budgets:
return
budget = self.budgets[record.team]
current_spend = self.get_monthly_spend(record.team)
if current_spend >= budget["monthly_limit"]:
self.create_alert("budget_exceeded", record.team, current_spend)
elif current_spend >= budget["monthly_limit"] * budget["alert_threshold"]:
self.create_alert("budget_warning", record.team, current_spend)
def get_cost_breakdown(self, start_date: datetime, end_date: datetime) -> Dict:
"""Get detailed cost breakdown."""
records = self.usage_store.query(start_date, end_date)
df = pd.DataFrame([r.__dict__ for r in records])
return {
"total_cost": df["cost"].sum(),
"by_team": df.groupby("team")["cost"].sum().to_dict(),
"by_model": df.groupby("model")["cost"].sum().to_dict(),
"by_project": df.groupby("project")["cost"].sum().to_dict(),
"by_service": df.groupby("service")["cost"].sum().to_dict(),
"daily_trend": df.groupby(df["timestamp"].dt.date)["cost"].sum().to_dict()
}
def identify_savings_opportunities(self) -> List[Dict]:
"""Identify cost optimization opportunities."""
opportunities = []
# Check for model downgrade opportunities
model_analysis = self.analyze_model_usage()
for model, stats in model_analysis.items():
if stats["avg_complexity"] < 0.3 and model == "gpt-4o":
opportunities.append({
"type": "model_downgrade",
"description": f"Switch simple queries from {model} to gpt-4o-mini",
"potential_savings": stats["cost"] * 0.7
})
# Check for caching opportunities
cache_analysis = self.analyze_cache_potential()
if cache_analysis["duplicate_rate"] > 0.1:
opportunities.append({
"type": "caching",
"description": "Implement prompt caching for repeated queries",
"potential_savings": cache_analysis["potential_savings"]
})
# Check for batch processing opportunities
realtime_analysis = self.analyze_realtime_requirements()
if realtime_analysis["batchable_percent"] > 0.2:
opportunities.append({
"type": "batching",
"description": "Batch non-urgent requests for cost efficiency",
"potential_savings": realtime_analysis["potential_savings"]
})
return sorted(opportunities, key=lambda x: -x["potential_savings"])
def generate_chargeback_report(self, month: datetime) -> Dict:
"""Generate chargeback report for teams."""
start = month.replace(day=1)
end = (start + timedelta(days=32)).replace(day=1)
breakdown = self.get_cost_breakdown(start, end)
return {
"period": month.strftime("%Y-%m"),
"total_cost": breakdown["total_cost"],
"team_charges": breakdown["by_team"],
"details": self.get_detailed_charges(start, end)
}
AI FinOps provides visibility and control over AI infrastructure costs.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n