5 min read
Video Intelligence with Azure Video Analyzer
Azure Video Analyzer enables you to extract insights from video content using AI. It combines edge computing with cloud intelligence to analyze live and recorded video streams for objects, motion, events, and more.
Video Analyzer Capabilities
- Object Detection: Detect and track objects in video
- Motion Detection: Identify movement and activity
- Line Crossing: Count objects crossing virtual lines
- Zone Intrusion: Alert when objects enter defined areas
- Face Detection: Detect faces (not recognition)
- Custom Models: Deploy your own ML models
Architecture Overview
+-------------+ +------------------+ +---------------+
| Camera | ---> | IoT Edge | ---> | Azure |
| Stream | | Video Analyzer | | Cloud |
+-------------+ +------------------+ +---------------+
| - AI Processing | | - Storage |
| - Local Insights | | - Analytics |
| - Edge Inference | | - Portal |
+------------------+ +---------------+
Setting Up Video Analyzer
from azure.media.videoanalyzer.edge import (
MediaGraphTopology,
MediaGraphInstance,
MediaGraphRtspSource,
MediaGraphMotionDetectionProcessor,
MediaGraphSignalGateProcessor,
MediaGraphIoTHubMessageSink
)
def create_motion_detection_topology():
"""Create a topology for motion detection."""
topology = MediaGraphTopology(
name="MotionDetection",
description="Detect motion in RTSP stream",
parameters=[
{"name": "rtspUrl", "type": "String"},
{"name": "rtspUserName", "type": "String"},
{"name": "rtspPassword", "type": "String", "default": ""}
],
sources=[
MediaGraphRtspSource(
name="rtspSource",
endpoint={
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "${rtspUrl}",
"credentials": {
"@type": "#Microsoft.Media.MediaGraphUsernamePasswordCredentials",
"username": "${rtspUserName}",
"password": "${rtspPassword}"
}
}
)
],
processors=[
MediaGraphMotionDetectionProcessor(
name="motionDetection",
inputs=[{"nodeName": "rtspSource"}],
sensitivity="medium",
outputMotionRegion=True
)
],
sinks=[
MediaGraphIoTHubMessageSink(
name="iotHubSink",
inputs=[{"nodeName": "motionDetection"}],
hubOutputName="inferenceOutput"
)
]
)
return topology
Object Detection Pipeline
def create_object_detection_topology():
"""Create topology with YOLO object detection."""
topology = MediaGraphTopology(
name="ObjectDetection",
description="Detect objects using YOLO",
parameters=[
{"name": "rtspUrl", "type": "String"},
{"name": "rtspUserName", "type": "String"},
{"name": "rtspPassword", "type": "String"},
{"name": "inferenceUrl", "type": "String"}
],
sources=[
{
"@type": "#Microsoft.Media.MediaGraphRtspSource",
"name": "rtspSource",
"endpoint": {
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "${rtspUrl}",
"credentials": {
"@type": "#Microsoft.Media.MediaGraphUsernamePasswordCredentials",
"username": "${rtspUserName}",
"password": "${rtspPassword}"
}
}
}
],
processors=[
{
"@type": "#Microsoft.Media.MediaGraphFrameRateFilterProcessor",
"name": "frameRateFilter",
"inputs": [{"nodeName": "rtspSource"}],
"maximumFps": 5
},
{
"@type": "#Microsoft.Media.MediaGraphHttpExtension",
"name": "httpExtension",
"inputs": [{"nodeName": "frameRateFilter"}],
"endpoint": {
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "${inferenceUrl}"
},
"image": {
"scale": {"mode": "preserveAspectRatio", "width": "416", "height": "416"},
"format": {"@type": "#Microsoft.Media.MediaGraphImageFormatJpeg"}
}
}
],
sinks=[
{
"@type": "#Microsoft.Media.MediaGraphIoTHubMessageSink",
"name": "iotHubSink",
"inputs": [{"nodeName": "httpExtension"}],
"hubOutputName": "inferenceOutput"
}
]
)
return topology
Processing Video Events
import json
from azure.iot.hub import IoTHubRegistryManager
from azure.eventhub import EventHubConsumerClient
class VideoEventProcessor:
def __init__(self, event_hub_connection: str, event_hub_name: str):
self.consumer = EventHubConsumerClient.from_connection_string(
event_hub_connection,
consumer_group="$Default",
eventhub_name=event_hub_name
)
def process_events(self, callback):
"""Process incoming video analytics events."""
def on_event(partition_context, event):
body = json.loads(event.body_as_str())
# Parse inference results
if "inferences" in body:
for inference in body["inferences"]:
event_data = {
"type": inference.get("type"),
"subtype": inference.get("subtype"),
"entity": inference.get("entity"),
"event": inference.get("event"),
"timestamp": event.enqueued_time
}
if inference["type"] == "entity":
entity = inference["entity"]
event_data["detection"] = {
"tag": entity.get("tag", {}).get("value"),
"confidence": entity.get("tag", {}).get("confidence"),
"box": entity.get("box")
}
callback(event_data)
partition_context.update_checkpoint(event)
with self.consumer:
self.consumer.receive(
on_event=on_event,
starting_position="-1"
)
# Process video analytics events
def handle_detection(event):
if event.get("detection"):
det = event["detection"]
print(f"Detected: {det['tag']} (confidence: {det['confidence']:.2f})")
print(f" Location: {det['box']}")
processor = VideoEventProcessor(
"your-event-hub-connection",
"your-event-hub-name"
)
# processor.process_events(handle_detection)
Line Crossing Detection
def create_line_crossing_topology():
"""Create topology for counting objects crossing a line."""
topology = {
"@apiVersion": "1.0",
"name": "LineCrossing",
"properties": {
"parameters": [
{"name": "rtspUrl", "type": "String"},
{"name": "lineCoordinates", "type": "String"}
],
"sources": [
{
"@type": "#Microsoft.Media.MediaGraphRtspSource",
"name": "rtspSource",
"endpoint": {
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "${rtspUrl}"
}
}
],
"processors": [
{
"@type": "#Microsoft.Media.MediaGraphCognitiveServicesVisionExtension",
"name": "spatialAnalysis",
"inputs": [{"nodeName": "rtspSource"}],
"endpoint": {
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "http://spatialanalysis:5000/score"
},
"operation": {
"@type": "#Microsoft.Media.MediaGraphSpatialAnalysisLineCrossingOperation",
"lines": [
{
"line": "${lineCoordinates}",
"events": [
{"type": "lineCrossed"}
]
}
],
"outputFrequency": "1"
}
}
],
"sinks": [
{
"@type": "#Microsoft.Media.MediaGraphIoTHubMessageSink",
"name": "iotHubSink",
"inputs": [{"nodeName": "spatialAnalysis"}],
"hubOutputName": "lineCrossingEvents"
}
]
}
}
return topology
# Line coordinates format: "x1,y1,x2,y2" (normalized 0-1)
# Example: "0.5,0,0.5,1" is a vertical line in the center
Zone Monitoring
def create_zone_monitoring_topology():
"""Monitor when objects enter/exit defined zones."""
topology = {
"@apiVersion": "1.0",
"name": "ZoneMonitoring",
"properties": {
"parameters": [
{"name": "rtspUrl", "type": "String"},
{"name": "zonePolygon", "type": "String"}
],
"sources": [
{
"@type": "#Microsoft.Media.MediaGraphRtspSource",
"name": "rtspSource",
"endpoint": {
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "${rtspUrl}"
}
}
],
"processors": [
{
"@type": "#Microsoft.Media.MediaGraphCognitiveServicesVisionExtension",
"name": "spatialAnalysis",
"inputs": [{"nodeName": "rtspSource"}],
"endpoint": {
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "http://spatialanalysis:5000/score"
},
"operation": {
"@type": "#Microsoft.Media.MediaGraphSpatialAnalysisPersonZoneCrossingOperation",
"zones": [
{
"zone": "${zonePolygon}",
"events": [
{"type": "zoneCrossed", "threshold": "16"},
{"type": "zoneOccupancy", "threshold": "8"}
]
}
]
}
}
],
"sinks": [
{
"@type": "#Microsoft.Media.MediaGraphIoTHubMessageSink",
"name": "iotHubSink",
"inputs": [{"nodeName": "spatialAnalysis"}],
"hubOutputName": "zoneEvents"
}
]
}
}
return topology
Recording Video Clips
def create_event_recording_topology():
"""Record video clips when events occur."""
topology = {
"@apiVersion": "1.0",
"name": "EventRecording",
"properties": {
"sources": [
{
"@type": "#Microsoft.Media.MediaGraphRtspSource",
"name": "rtspSource",
"endpoint": {
"@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
"url": "${rtspUrl}"
}
}
],
"processors": [
{
"@type": "#Microsoft.Media.MediaGraphMotionDetectionProcessor",
"name": "motionDetection",
"inputs": [{"nodeName": "rtspSource"}],
"sensitivity": "medium"
},
{
"@type": "#Microsoft.Media.MediaGraphSignalGateProcessor",
"name": "signalGate",
"inputs": [
{"nodeName": "motionDetection"},
{"nodeName": "rtspSource"}
],
"activationEvaluationWindow": "PT1S",
"activationSignalOffset": "-PT5S",
"minimumActivationTime": "PT30S",
"maximumActivationTime": "PT1M"
}
],
"sinks": [
{
"@type": "#Microsoft.Media.MediaGraphAssetSink",
"name": "assetSink",
"inputs": [{"nodeName": "signalGate"}],
"assetNamePattern": "motion-${System.DateTime}",
"segmentLength": "PT30S"
}
]
}
}
return topology
Best Practices
- Edge Processing: Process locally to reduce bandwidth
- Frame Rate: Lower frame rates reduce compute requirements
- Resolution: Balance quality with processing speed
- Zone Design: Define clear, non-overlapping zones
- Event Filtering: Filter events to reduce noise
- Storage Management: Implement retention policies
Azure Video Analyzer brings intelligent video analytics to both edge and cloud, enabling sophisticated spatial analysis and event detection for security, retail, and industrial applications.