5 min read
Materialized Views in Azure Data Explorer
Materialized Views in Azure Data Explorer
Materialized views pre-compute and store aggregated data, dramatically improving query performance for common patterns. Let’s explore how to implement materialized views for monitoring data.
What Are Materialized Views?
Materialized views:
- Pre-compute aggregations as data arrives
- Store results for fast querying
- Update automatically with new data
- Reduce compute costs for repeated queries
When to Use Materialized Views
Ideal scenarios:
- Dashboard queries that run frequently
- Aggregations over large time ranges
- Deduplication requirements
- Downsampling high-frequency data
Creating Basic Materialized Views
Hourly Aggregation
// Create materialized view for hourly metrics
.create materialized-view with (backfill=true) HourlyMetrics on table PerfMetrics
{
PerfMetrics
| summarize
AvgValue = avg(CounterValue),
MinValue = min(CounterValue),
MaxValue = max(CounterValue),
SampleCount = count()
by bin(TimeGenerated, 1h), Computer, CounterName, InstanceName
}
Error Summary
// Aggregate errors by hour and namespace
.create materialized-view ErrorSummary on table ContainerLogs
{
ContainerLogs
| where LogEntry contains "error" or LogEntry contains "exception"
| summarize
ErrorCount = count(),
FirstSeen = min(TimeGenerated),
LastSeen = max(TimeGenerated)
by bin(TimeGenerated, 1h), Namespace, PodName
}
Deduplication Views
Remove Duplicate Events
// Keep only the latest record per pod
.create materialized-view LatestPodStatus on table KubePodInventory
{
KubePodInventory
| summarize arg_max(TimeGenerated, *) by Name, Namespace
}
Dedup by Custom Key
// Deduplicate by request ID
.create materialized-view UniqueRequests on table RequestLogs
{
RequestLogs
| summarize arg_min(TimeGenerated, *) by RequestId
}
Advanced Aggregations
Multiple Aggregation Levels
// Create minute-level aggregation
.create materialized-view MinuteMetrics on table PerfMetrics
{
PerfMetrics
| summarize
AvgValue = avg(CounterValue),
P95Value = percentile(CounterValue, 95),
P99Value = percentile(CounterValue, 99)
by bin(TimeGenerated, 1m), Computer, CounterName
}
// Create hour-level from minute-level
.create materialized-view HourlyFromMinute on table MinuteMetrics
{
MinuteMetrics
| summarize
AvgValue = avg(AvgValue),
P95Value = max(P95Value),
P99Value = max(P99Value)
by bin(TimeGenerated, 1h), Computer, CounterName
}
Cardinality Estimation
// Count unique values efficiently
.create materialized-view UniqueUsers on table RequestLogs
{
RequestLogs
| summarize
UniqueUsers = dcount(UserId),
TotalRequests = count()
by bin(TimeGenerated, 1h), Endpoint
}
Querying Materialized Views
Direct Query
// Query the materialized view directly
HourlyMetrics
| where TimeGenerated > ago(7d)
| where CounterName == "cpuUsageNanoCores"
| render timechart
Using materialized_view() Function
// Explicitly query materialized portion
materialized_view('HourlyMetrics')
| where TimeGenerated > ago(30d)
| summarize sum(SampleCount) by Computer
Combining with Live Data
// Get historical from view, recent from table
let historical = materialized_view('HourlyMetrics')
| where TimeGenerated between (ago(30d) .. ago(1h));
let recent = PerfMetrics
| where TimeGenerated > ago(1h)
| summarize AvgValue = avg(CounterValue) by bin(TimeGenerated, 1h), Computer, CounterName;
union historical, recent
| summarize avg(AvgValue) by bin(TimeGenerated, 1d), Computer
Managing Materialized Views
Check View Status
// Show all materialized views
.show materialized-views
// Show specific view details
.show materialized-view HourlyMetrics
// Check materialization progress
.show materialized-view HourlyMetrics extents
View Statistics
// Get materialization statistics
.show materialized-view HourlyMetrics statistics
// Check for lag
.show materialized-view HourlyMetrics statistics
| project MaterializationLag = LastRun - now()
Alter Views
// Disable view
.disable materialized-view HourlyMetrics
// Enable view
.enable materialized-view HourlyMetrics
// Drop view
.drop materialized-view HourlyMetrics
Backfill Strategies
Full Backfill
// Create with full backfill (processes all existing data)
.create materialized-view with (backfill=true) HourlyMetrics on table PerfMetrics
{
PerfMetrics
| summarize avg(CounterValue) by bin(TimeGenerated, 1h), Computer
}
Partial Backfill
// Backfill from specific date
.create materialized-view with (effectiveDateTime=datetime(2021-09-01)) HourlyMetrics on table PerfMetrics
{
PerfMetrics
| summarize avg(CounterValue) by bin(TimeGenerated, 1h), Computer
}
Async Backfill
// Create view and backfill asynchronously
.create async materialized-view with (backfill=true) LargeViewBackfill on table MassiveTable
{
MassiveTable
| summarize count() by bin(TimeGenerated, 1h), Category
}
Policies and Configuration
Retention Policy
// Set retention for materialized view
.alter materialized-view HourlyMetrics policy retention ```
{
"SoftDeletePeriod": "365.00:00:00",
"Recoverability": "Enabled"
}```
Caching Policy
// Set hot cache for fast queries
.alter materialized-view HourlyMetrics policy caching hot = 90d
Performance Considerations
Query Performance Comparison
// Without materialized view (slow on large data)
PerfMetrics
| where TimeGenerated > ago(30d)
| summarize avg(CounterValue) by bin(TimeGenerated, 1h), Computer
| count
// With materialized view (fast)
HourlyMetrics
| where TimeGenerated > ago(30d)
| count
Monitoring View Health
// Create alert for materialization lag
.show materialized-view HourlyMetrics statistics
| extend LagMinutes = datetime_diff('minute', now(), LastRun)
| where LagMinutes > 30
Real-World Examples
Dashboard Summary View
.create materialized-view DashboardSummary on table ContainerLogs
{
ContainerLogs
| summarize
TotalLogs = count(),
ErrorLogs = countif(LogEntry contains "error"),
WarningLogs = countif(LogEntry contains "warning")
by bin(TimeGenerated, 5m), Namespace, PodName
}
SLA Metrics View
.create materialized-view SLAMetrics on table RequestLogs
{
RequestLogs
| summarize
TotalRequests = count(),
SuccessfulRequests = countif(StatusCode < 500),
P50Latency = percentile(DurationMs, 50),
P99Latency = percentile(DurationMs, 99)
by bin(TimeGenerated, 1h), ServiceName, Endpoint
}
Best Practices
- Start with common queries - Materialize patterns you run frequently
- Choose appropriate granularity - Match aggregation to query needs
- Monitor lag - Ensure views stay current
- Set retention - Don’t keep materialized data longer than needed
- Test query patterns - Verify views improve performance
- Document views - Keep track of what each view provides
Conclusion
Materialized views are a powerful optimization technique for Azure Data Explorer. By pre-computing aggregations, you can dramatically improve query performance and reduce costs for common monitoring scenarios.
Tomorrow, we’ll explore external tables in ADX for querying data stored outside the cluster.