September 22, 2021 1 min read

Video Intelligence with Azure Video Analyzer

Azure Video Analytics Computer Vision AI IoT

Azure Video Analyzer enables you to extract insights from video content using AI. It combines edge computing with cloud intelligence to analyze live and recorded video streams for objects, motion, events, and more.

Video Analyzer Capabilities

Object Detection: Detect and track objects in video
Motion Detection: Identify movement and activity
Line Crossing: Count objects crossing virtual lines
Zone Intrusion: Alert when objects enter defined areas
Face Detection: Detect faces (not recognition)
Custom Models: Deploy your own ML models

Architecture Overview

+-------------+      +------------------+      +---------------+
|   Camera    | ---> |  IoT Edge        | ---> |   Azure       |
|   Stream    |      |  Video Analyzer  |      |   Cloud       |
+-------------+      +------------------+      +---------------+
                     |  - AI Processing  |      |  - Storage    |
                     |  - Local Insights |      |  - Analytics  |
                     |  - Edge Inference |      |  - Portal     |
                     +------------------+      +---------------+

Setting Up Video Analyzer

from azure.media.videoanalyzer.edge import (
    MediaGraphTopology,
    MediaGraphInstance,
    MediaGraphRtspSource,
    MediaGraphMotionDetectionProcessor,
    MediaGraphSignalGateProcessor,
    MediaGraphIoTHubMessageSink
)

def create_motion_detection_topology():
    """Create a topology for motion detection."""

    topology = MediaGraphTopology(
        name="MotionDetection",
        description="Detect motion in RTSP stream",
        parameters=[
            {"name": "rtspUrl", "type": "String"},
            {"name": "rtspUserName", "type": "String"},
            {"name": "rtspPassword", "type": "String", "default": ""}
        ],
        sources=[
            MediaGraphRtspSource(
                name="rtspSource",
                endpoint={
                    "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                    "url": "${rtspUrl}",
                    "credentials": {
                        "@type": "#Microsoft.Media.MediaGraphUsernamePasswordCredentials",
                        "username": "${rtspUserName}",
                        "password": "${rtspPassword}"
                    }
                }
            )
        ],
        processors=[
            MediaGraphMotionDetectionProcessor(
                name="motionDetection",
                inputs=[{"nodeName": "rtspSource"}],
                sensitivity="medium",
                outputMotionRegion=True
            )
        ],
        sinks=[
            MediaGraphIoTHubMessageSink(
                name="iotHubSink",
                inputs=[{"nodeName": "motionDetection"}],
                hubOutputName="inferenceOutput"
            )
        ]
    )

    return topology

Object Detection Pipeline

def create_object_detection_topology():
    """Create topology with YOLO object detection."""

    topology = MediaGraphTopology(
        name="ObjectDetection",
        description="Detect objects using YOLO",
        parameters=[
            {"name": "rtspUrl", "type": "String"},
            {"name": "rtspUserName", "type": "String"},
            {"name": "rtspPassword", "type": "String"},
            {"name": "inferenceUrl", "type": "String"}
        ],
        sources=[
            {
                "@type": "#Microsoft.Media.MediaGraphRtspSource",
                "name": "rtspSource",
                "endpoint": {
                    "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                    "url": "${rtspUrl}",
                    "credentials": {
                        "@type": "#Microsoft.Media.MediaGraphUsernamePasswordCredentials",
                        "username": "${rtspUserName}",
                        "password": "${rtspPassword}"
                    }
                }
            }
        ],
        processors=[
            {
                "@type": "#Microsoft.Media.MediaGraphFrameRateFilterProcessor",
                "name": "frameRateFilter",
                "inputs": [{"nodeName": "rtspSource"}],
                "maximumFps": 5
            },
            {
                "@type": "#Microsoft.Media.MediaGraphHttpExtension",
                "name": "httpExtension",
                "inputs": [{"nodeName": "frameRateFilter"}],
                "endpoint": {
                    "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                    "url": "${inferenceUrl}"
                },
                "image": {
                    "scale": {"mode": "preserveAspectRatio", "width": "416", "height": "416"},
                    "format": {"@type": "#Microsoft.Media.MediaGraphImageFormatJpeg"}
                }
            }
        ],
        sinks=[
            {
                "@type": "#Microsoft.Media.MediaGraphIoTHubMessageSink",
                "name": "iotHubSink",
                "inputs": [{"nodeName": "httpExtension"}],
                "hubOutputName": "inferenceOutput"
            }
        ]
    )

    return topology

Processing Video Events

import json
from azure.iot.hub import IoTHubRegistryManager
from azure.eventhub import EventHubConsumerClient

class VideoEventProcessor:
    def __init__(self, event_hub_connection: str, event_hub_name: str):
        self.consumer = EventHubConsumerClient.from_connection_string(
            event_hub_connection,
            consumer_group="$Default",
            eventhub_name=event_hub_name
        )

    def process_events(self, callback):
        """Process incoming video analytics events."""

        def on_event(partition_context, event):
            body = json.loads(event.body_as_str())

            # Parse inference results
            if "inferences" in body:
                for inference in body["inferences"]:
                    event_data = {
                        "type": inference.get("type"),
                        "subtype": inference.get("subtype"),
                        "entity": inference.get("entity"),
                        "event": inference.get("event"),
                        "timestamp": event.enqueued_time
                    }

                    if inference["type"] == "entity":
                        entity = inference["entity"]
                        event_data["detection"] = {
                            "tag": entity.get("tag", {}).get("value"),
                            "confidence": entity.get("tag", {}).get("confidence"),
                            "box": entity.get("box")
                        }

                    callback(event_data)

            partition_context.update_checkpoint(event)

        with self.consumer:
            self.consumer.receive(
                on_event=on_event,
                starting_position="-1"
            )

# Process video analytics events
def handle_detection(event):
    if event.get("detection"):
        det = event["detection"]
        print(f"Detected: {det['tag']} (confidence: {det['confidence']:.2f})")
        print(f"  Location: {det['box']}")

processor = VideoEventProcessor(
    "your-event-hub-connection",
    "your-event-hub-name"
)
# processor.process_events(handle_detection)

Line Crossing Detection

def create_line_crossing_topology():
    """Create topology for counting objects crossing a line."""

    topology = {
        "@apiVersion": "1.0",
        "name": "LineCrossing",
        "properties": {
            "parameters": [
                {"name": "rtspUrl", "type": "String"},
                {"name": "lineCoordinates", "type": "String"}
            ],
            "sources": [
                {
                    "@type": "#Microsoft.Media.MediaGraphRtspSource",
                    "name": "rtspSource",
                    "endpoint": {
                        "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                        "url": "${rtspUrl}"
                    }
                }
            ],
            "processors": [
                {
                    "@type": "#Microsoft.Media.MediaGraphCognitiveServicesVisionExtension",
                    "name": "spatialAnalysis",
                    "inputs": [{"nodeName": "rtspSource"}],
                    "endpoint": {
                        "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                        "url": "http://spatialanalysis:5000/score"
                    },
                    "operation": {
                        "@type": "#Microsoft.Media.MediaGraphSpatialAnalysisLineCrossingOperation",
                        "lines": [
                            {
                                "line": "${lineCoordinates}",
                                "events": [
                                    {"type": "lineCrossed"}
                                ]
                            }
                        ],
                        "outputFrequency": "1"
                    }
                }
            ],
            "sinks": [
                {
                    "@type": "#Microsoft.Media.MediaGraphIoTHubMessageSink",
                    "name": "iotHubSink",
                    "inputs": [{"nodeName": "spatialAnalysis"}],
                    "hubOutputName": "lineCrossingEvents"
                }
            ]
        }
    }

    return topology

# Line coordinates format: "x1,y1,x2,y2" (normalized 0-1)
# Example: "0.5,0,0.5,1" is a vertical line in the center

Zone Monitoring

def create_zone_monitoring_topology():
    """Monitor when objects enter/exit defined zones."""

    topology = {
        "@apiVersion": "1.0",
        "name": "ZoneMonitoring",
        "properties": {
            "parameters": [
                {"name": "rtspUrl", "type": "String"},
                {"name": "zonePolygon", "type": "String"}
            ],
            "sources": [
                {
                    "@type": "#Microsoft.Media.MediaGraphRtspSource",
                    "name": "rtspSource",
                    "endpoint": {
                        "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                        "url": "${rtspUrl}"
                    }
                }
            ],
            "processors": [
                {
                    "@type": "#Microsoft.Media.MediaGraphCognitiveServicesVisionExtension",
                    "name": "spatialAnalysis",
                    "inputs": [{"nodeName": "rtspSource"}],
                    "endpoint": {
                        "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                        "url": "http://spatialanalysis:5000/score"
                    },
                    "operation": {
                        "@type": "#Microsoft.Media.MediaGraphSpatialAnalysisPersonZoneCrossingOperation",
                        "zones": [
                            {
                                "zone": "${zonePolygon}",
                                "events": [
                                    {"type": "zoneCrossed", "threshold": "16"},
                                    {"type": "zoneOccupancy", "threshold": "8"}
                                ]
                            }
                        ]
                    }
                }
            ],
            "sinks": [
                {
                    "@type": "#Microsoft.Media.MediaGraphIoTHubMessageSink",
                    "name": "iotHubSink",
                    "inputs": [{"nodeName": "spatialAnalysis"}],
                    "hubOutputName": "zoneEvents"
                }
            ]
        }
    }

    return topology

Recording Video Clips

def create_event_recording_topology():
    """Record video clips when events occur."""

    topology = {
        "@apiVersion": "1.0",
        "name": "EventRecording",
        "properties": {
            "sources": [
                {
                    "@type": "#Microsoft.Media.MediaGraphRtspSource",
                    "name": "rtspSource",
                    "endpoint": {
                        "@type": "#Microsoft.Media.MediaGraphUnsecuredEndpoint",
                        "url": "${rtspUrl}"
                    }
                }
            ],
            "processors": [
                {
                    "@type": "#Microsoft.Media.MediaGraphMotionDetectionProcessor",
                    "name": "motionDetection",
                    "inputs": [{"nodeName": "rtspSource"}],
                    "sensitivity": "medium"
                },
                {
                    "@type": "#Microsoft.Media.MediaGraphSignalGateProcessor",
                    "name": "signalGate",
                    "inputs": [
                        {"nodeName": "motionDetection"},
                        {"nodeName": "rtspSource"}
                    ],
                    "activationEvaluationWindow": "PT1S",
                    "activationSignalOffset": "-PT5S",
                    "minimumActivationTime": "PT30S",
                    "maximumActivationTime": "PT1M"
                }
            ],
            "sinks": [
                {
                    "@type": "#Microsoft.Media.MediaGraphAssetSink",
                    "name": "assetSink",
                    "inputs": [{"nodeName": "signalGate"}],
                    "assetNamePattern": "motion-${System.DateTime}",
                    "segmentLength": "PT30S"
                }
            ]
        }
    }

    return topology

Best Practices

Edge Processing: Process locally to reduce bandwidth
Frame Rate: Lower frame rates reduce compute requirements
Resolution: Balance quality with processing speed
Zone Design: Define clear, non-overlapping zones
Event Filtering: Filter events to reduce noise
Storage Management: Implement retention policies

Azure Video Analyzer brings intelligent video analytics to both edge and cloud, enabling sophisticated spatial analysis and event detection for security, retail, and industrial applications.