Skip to content
Back to Blog
1 min read

AI Monitoring Dashboards: Visualizing AI System Health

I wrote “AI Monitoring Dashboards: Visualizing AI System Health” to share practical, production-minded guidance on this topic.

AI Monitoring Dashboard

from azure.monitor.query import LogsQueryClient
from azure.identity import DefaultAzureCredential
import pandas as pd
from datetime import datetime, timedelta

class AIMonitoringDashboard:
    def __init__(self, workspace_id: str):
        self.workspace_id = workspace_id
        self.client = LogsQueryClient(DefaultAzureCredential())

    async def get_latency_metrics(self, hours: int = 24) -> pd.DataFrame:
        """Get latency metrics for AI endpoints."""
        query = """
        AzureDiagnostics
        | where Category == "AIInference"
        | where TimeGenerated > ago({hours}h)
        | summarize
            avg_latency = avg(DurationMs),
            p50_latency = percentile(DurationMs, 50),
            p95_latency = percentile(DurationMs, 95),
            p99_latency = percentile(DurationMs, 99),
            request_count = count()
            by bin(TimeGenerated, 1h), EndpointName
        | order by TimeGenerated asc
        """.format(hours=hours)

        return await self.execute_query(query)

    async def get_quality_metrics(self, hours: int = 24) -> pd.DataFrame:
        """Get quality metrics for AI responses."""
        query = """
        customMetrics
        | where name in ("ai.relevancy", "ai.faithfulness", "ai.coherence")
        | where timestamp > ago({hours}h)
        | summarize
            avg_score = avg(value),
            min_score = min(value),
            max_score = max(value)
            by bin(timestamp, 1h), name, cloud_RoleName
        | order by timestamp asc
        """.format(hours=hours)

        return await self.execute_query(query)

    async def get_cost_metrics(self, days: int = 30) -> pd.DataFrame:
        """Get cost metrics for AI usage."""
        query = """
        customMetrics
        | where name == "ai.token_cost"
        | where timestamp > ago({days}d)
        | summarize
            total_cost = sum(value),
            avg_cost_per_request = avg(value)
            by bin(timestamp, 1d), cloud_RoleName, customDimensions.model
        | order by timestamp asc
        """.format(days=days)

        return await self.execute_query(query)

    async def get_error_metrics(self, hours: int = 24) -> pd.DataFrame:
        """Get error metrics for AI endpoints."""
        query = """
        AzureDiagnostics
        | where Category == "AIInference"
        | where TimeGenerated > ago({hours}h)
        | summarize
            total_requests = count(),
            error_count = countif(ResultCode >= 400),
            error_rate = todouble(countif(ResultCode >= 400)) / count() * 100
            by bin(TimeGenerated, 1h), EndpointName
        | order by TimeGenerated asc
        """.format(hours=hours)

        return await self.execute_query(query)

    async def get_token_usage(self, hours: int = 24) -> pd.DataFrame:
        """Get token usage metrics."""
        query = """
        customMetrics
        | where name in ("ai.input_tokens", "ai.output_tokens")
        | where timestamp > ago({hours}h)
        | summarize
            total_tokens = sum(value)
            by bin(timestamp, 1h), name, customDimensions.model
        | order by timestamp asc
        """.format(hours=hours)

        return await self.execute_query(query)

    def build_dashboard_data(self) -> dict:
        """Build complete dashboard data."""
        return {
            "latency": self.get_latency_metrics(),
            "quality": self.get_quality_metrics(),
            "costs": self.get_cost_metrics(),
            "errors": self.get_error_metrics(),
            "tokens": self.get_token_usage(),
            "summary": self.get_summary_stats()
        }

Comprehensive monitoring dashboards enable proactive AI operations management.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.