Back to Blog
2 min read

Token Cost Analysis: Understanding and Managing LLM Expenses

Understanding token economics is essential for managing LLM costs. Here’s how to analyze, predict, and control token-based expenses.

Token Basics

import tiktoken

def count_tokens(text: str, model: str = "gpt-4") -> int:
    """Count tokens for a given text."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

# Examples
print(count_tokens("Hello, world!"))  # ~3 tokens
print(count_tokens("A" * 1000))        # ~250 tokens (4 chars per token average)

Cost Calculation

class TokenCostCalculator:
    PRICING = {
        "gpt-4": {"input": 0.03, "output": 0.06},
        "gpt-4-turbo": {"input": 0.01, "output": 0.03},
        "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
        "text-embedding-ada-002": {"input": 0.0001, "output": 0}
    }

    def calculate_cost(
        self,
        model: str,
        input_tokens: int,
        output_tokens: int
    ) -> float:
        pricing = self.PRICING.get(model)
        return (
            input_tokens / 1000 * pricing["input"] +
            output_tokens / 1000 * pricing["output"]
        )

    def estimate_monthly_cost(
        self,
        model: str,
        daily_requests: int,
        avg_input_tokens: int,
        avg_output_tokens: int
    ) -> dict:
        daily = self.calculate_cost(model, avg_input_tokens * daily_requests, avg_output_tokens * daily_requests)
        return {
            "daily": daily,
            "monthly": daily * 30,
            "yearly": daily * 365
        }

# Example
calc = TokenCostCalculator()
costs = calc.estimate_monthly_cost("gpt-4-turbo", 10000, 500, 200)
print(f"Monthly cost: ${costs['monthly']:.2f}")

Usage Analysis

def analyze_usage_patterns(usage_logs: list[dict]) -> dict:
    """Analyze token usage patterns."""

    total_input = sum(log["input_tokens"] for log in usage_logs)
    total_output = sum(log["output_tokens"] for log in usage_logs)

    input_avg = total_input / len(usage_logs)
    output_avg = total_output / len(usage_logs)

    # Find outliers
    input_p95 = sorted([log["input_tokens"] for log in usage_logs])[int(len(usage_logs) * 0.95)]
    output_p95 = sorted([log["output_tokens"] for log in usage_logs])[int(len(usage_logs) * 0.95)]

    return {
        "total_requests": len(usage_logs),
        "total_input_tokens": total_input,
        "total_output_tokens": total_output,
        "avg_input_tokens": input_avg,
        "avg_output_tokens": output_avg,
        "p95_input_tokens": input_p95,
        "p95_output_tokens": output_p95,
        "input_output_ratio": total_input / total_output
    }

Token Optimization Tips

optimization_strategies = {
    "prompt_compression": {
        "technique": "Remove filler words and redundant instructions",
        "savings": "20-40%"
    },
    "context_truncation": {
        "technique": "Limit context to most relevant content",
        "savings": "30-60%"
    },
    "response_limits": {
        "technique": "Set max_tokens appropriately",
        "savings": "10-30%"
    },
    "caching": {
        "technique": "Cache common responses",
        "savings": "20-50%"
    }
}

Monitoring Dashboard

class TokenDashboard:
    def __init__(self):
        self.usage_history = []

    def record(self, model: str, input_tokens: int, output_tokens: int, cost: float):
        self.usage_history.append({
            "timestamp": datetime.utcnow(),
            "model": model,
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "cost": cost
        })

    def get_daily_summary(self) -> dict:
        today = [u for u in self.usage_history if u["timestamp"].date() == datetime.utcnow().date()]
        return {
            "total_requests": len(today),
            "total_cost": sum(u["cost"] for u in today),
            "total_tokens": sum(u["input_tokens"] + u["output_tokens"] for u in today)
        }

Conclusion

Token cost analysis enables informed decisions about LLM usage. Track, analyze, and optimize to keep costs predictable and manageable.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.