4 min read
Deploying Machine Learning Models with Azure ML
After training and registering your model, the next step is deployment. Azure ML provides multiple deployment options to serve your models based on different requirements for latency, scale, and cost.
Deployment Options Overview
| Option | Use Case | Latency | Scale |
|---|---|---|---|
| Managed Online Endpoints | Real-time inference | Low | Auto-scaling |
| Kubernetes Endpoints | Enterprise/hybrid | Low | Custom |
| Batch Endpoints | Large-scale batch | N/A | High throughput |
| Azure Container Instances | Dev/test | Medium | Manual |
Creating a Scoring Script
# score.py
import os
import json
import logging
import joblib
import numpy as np
def init():
"""
Initialize model when the container starts.
Called once when the deployment is created.
"""
global model
logging.info("Initializing model...")
# AZUREML_MODEL_DIR is set by Azure ML
model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
model = joblib.load(model_path)
logging.info("Model loaded successfully")
def run(raw_data):
"""
Process inference requests.
Called for each prediction request.
"""
try:
# Parse input data
data = json.loads(raw_data)
# Handle both single and batch predictions
if isinstance(data, dict):
input_data = np.array([list(data.values())])
else:
input_data = np.array(data)
# Make predictions
predictions = model.predict(input_data)
probabilities = model.predict_proba(input_data)
# Return results
return {
"predictions": predictions.tolist(),
"probabilities": probabilities.tolist()
}
except Exception as e:
logging.error(f"Error during inference: {str(e)}")
return {"error": str(e)}
MLflow Model Scoring (No Custom Script)
# For MLflow models, Azure ML can auto-generate the scoring script
# Just deploy the model directly
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment
# Endpoint handles routing
endpoint = ManagedOnlineEndpoint(
name="churn-predictor-endpoint",
description="Customer churn prediction service",
auth_mode="key"
)
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
# Deployment handles the model serving
deployment = ManagedOnlineDeployment(
name="blue",
endpoint_name="churn-predictor-endpoint",
model="azureml:customer-churn-predictor:2",
instance_type="Standard_DS3_v2",
instance_count=1
)
ml_client.online_deployments.begin_create_or_update(deployment).result()
Custom Environment for Deployment
from azure.ai.ml.entities import Environment, BuildContext
# From conda specification
conda_env = """
name: inference-env
channels:
- conda-forge
dependencies:
- python=3.9
- pip
- pip:
- scikit-learn==1.0.0
- joblib==1.1.0
- numpy==1.21.0
- inference-schema
"""
# Create environment
env = Environment(
name="sklearn-inference-env",
description="Environment for sklearn model inference",
conda_file="conda.yml",
image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest"
)
ml_client.environments.create_or_update(env)
Deployment with Custom Code
from azure.ai.ml.entities import (
ManagedOnlineEndpoint,
ManagedOnlineDeployment,
Model,
CodeConfiguration
)
# Create endpoint
endpoint = ManagedOnlineEndpoint(
name="fraud-detector-endpoint",
description="Real-time fraud detection",
auth_mode="key",
tags={"team": "risk-analytics"}
)
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
# Create deployment with custom scoring script
deployment = ManagedOnlineDeployment(
name="green",
endpoint_name="fraud-detector-endpoint",
model="azureml:fraud-detector:3",
code_configuration=CodeConfiguration(
code="./src/scoring",
scoring_script="score.py"
),
environment="azureml:sklearn-inference-env:1",
instance_type="Standard_DS3_v2",
instance_count=2,
request_settings={
"request_timeout_ms": 5000,
"max_concurrent_requests_per_instance": 100
},
liveness_probe={
"initial_delay": 30,
"period": 10,
"failure_threshold": 3
},
readiness_probe={
"initial_delay": 10,
"period": 10,
"failure_threshold": 3
}
)
ml_client.online_deployments.begin_create_or_update(deployment).result()
# Route all traffic to the new deployment
endpoint.traffic = {"green": 100}
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
Testing Deployed Models
import json
import urllib.request
# Get endpoint details
endpoint = ml_client.online_endpoints.get("fraud-detector-endpoint")
scoring_uri = endpoint.scoring_uri
api_key = ml_client.online_endpoints.get_keys("fraud-detector-endpoint").primary_key
# Prepare request
data = {
"transaction_amount": 150.00,
"merchant_category": "online_retail",
"user_history_score": 0.85,
"time_since_last_transaction": 120
}
body = json.dumps(data).encode('utf-8')
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}'
}
req = urllib.request.Request(scoring_uri, body, headers)
with urllib.request.urlopen(req) as response:
result = json.loads(response.read())
print(f"Prediction: {result}")
Using the Azure ML SDK for Inference
# Simpler approach using the SDK
result = ml_client.online_endpoints.invoke(
endpoint_name="fraud-detector-endpoint",
request_file="./test-request.json"
)
print(result)
Blue-Green Deployment Strategy
# Create new deployment (green) alongside existing (blue)
green_deployment = ManagedOnlineDeployment(
name="green",
endpoint_name="fraud-detector-endpoint",
model="azureml:fraud-detector:4", # New model version
environment="azureml:sklearn-inference-env:1",
instance_type="Standard_DS3_v2",
instance_count=2
)
ml_client.online_deployments.begin_create_or_update(green_deployment).result()
# Gradually shift traffic
endpoint = ml_client.online_endpoints.get("fraud-detector-endpoint")
# 10% to green for testing
endpoint.traffic = {"blue": 90, "green": 10}
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
# After validation, shift more traffic
endpoint.traffic = {"blue": 50, "green": 50}
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
# Complete migration
endpoint.traffic = {"blue": 0, "green": 100}
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
# Delete old deployment
ml_client.online_deployments.begin_delete(
name="blue",
endpoint_name="fraud-detector-endpoint"
).result()
Monitoring Deployments
# Get deployment logs
logs = ml_client.online_deployments.get_logs(
name="green",
endpoint_name="fraud-detector-endpoint",
lines=100
)
print(logs)
# Deployment metrics are available in Azure Monitor
# Query via Azure CLI or portal
Best Practices
- Use MLflow models: Simplifies deployment with auto-generated scoring
- Implement health probes: Ensure reliable service discovery
- Set request timeouts: Protect against slow requests
- Use blue-green deployments: Zero-downtime updates
- Monitor inference latency: Set up alerts for performance degradation
- Scale appropriately: Use auto-scaling rules based on traffic
Model deployment is where your ML work delivers business value. Azure ML’s managed endpoints make it straightforward to deploy, scale, and update models in production.