Back to Blog
4 min read

Azure Machine Learning Updates from Microsoft Ignite 2022

Microsoft Ignite 2022 brought significant updates to Azure Machine Learning, focusing on responsible AI, MLOps improvements, and new model capabilities. Let’s explore what’s new and how these features improve ML workflows.

Managed Feature Store (Preview)

Feature stores are crucial for ML operations. Azure ML now includes a managed feature store:

from azure.ai.ml import MLClient
from azure.ai.ml.entities import FeatureStore, FeatureSet, Feature
from azure.identity import DefaultAzureCredential

# Connect to workspace
ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id="your-subscription",
    resource_group="your-rg",
    workspace_name="your-workspace"
)

# Create a feature store
feature_store = FeatureStore(
    name="customer-features",
    description="Customer feature store for recommendation models"
)

ml_client.feature_stores.begin_create_or_update(feature_store).result()

# Define a feature set
customer_features = FeatureSet(
    name="customer_profile",
    version="1",
    entities=["customer_id"],
    features=[
        Feature(name="total_purchases", type="float"),
        Feature(name="avg_order_value", type="float"),
        Feature(name="days_since_last_purchase", type="int"),
        Feature(name="loyalty_tier", type="string"),
        Feature(name="preferred_category", type="string")
    ],
    source_path="azureml://datastores/customerdata/paths/features/customer_profile.parquet"
)

ml_client.feature_sets.begin_create_or_update(customer_features).result()

Using Features in Training

from azure.ai.ml import command, Input
from azure.ai.ml.entities import Environment

# Reference features in training job
training_job = command(
    code="./src",
    command="python train.py --features ${{inputs.features}} --output ${{outputs.model}}",
    inputs={
        "features": Input(
            type="uri_folder",
            path="azureml://featurestores/customer-features/featuresets/customer_profile/versions/1"
        )
    },
    outputs={
        "model": Output(type="mlflow_model")
    },
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1",
    compute="cpu-cluster"
)

ml_client.jobs.create_or_update(training_job)

Responsible AI Dashboard Improvements

The Responsible AI dashboard now includes more capabilities:

from azure.ai.ml.entities import ResponsibleAISettings

# Configure responsible AI analysis
rai_settings = ResponsibleAISettings(
    error_analysis=True,
    counterfactual=True,
    causal_analysis=True,
    data_explorer=True,
    model_overview=True,
    feature_importances=True
)

# Create RAI insights
from raiwidgets import ResponsibleAIDashboard
from responsibleai import RAIInsights

# Initialize RAI insights
rai_insights = RAIInsights(model, train_data, test_data, target_column, task_type='classification')

# Add components
rai_insights.explainer.add()
rai_insights.error_analysis.add()
rai_insights.counterfactual.add(total_CFs=10, desired_class='opposite')
rai_insights.causal.add(treatment_features=['feature1', 'feature2'])

# Compute insights
rai_insights.compute()

# Create dashboard
ResponsibleAIDashboard(rai_insights)

Model Registry Enhancements

from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

# Register model with detailed metadata
model = Model(
    path="./outputs/model",
    name="customer-churn-predictor",
    version="1",
    type=AssetTypes.MLFLOW_MODEL,
    description="Customer churn prediction model",
    tags={
        "task": "classification",
        "framework": "scikit-learn",
        "accuracy": "0.92"
    },
    properties={
        "training_data": "customer_data_v2",
        "feature_count": "25",
        "training_samples": "50000",
        "responsible_ai_reviewed": "true"
    }
)

registered_model = ml_client.models.create_or_update(model)

Pipeline Improvements

Component-Based Pipelines

from azure.ai.ml import dsl, Input, Output
from azure.ai.ml.entities import Component

# Define reusable components
data_prep_component = Component(
    name="data_preparation",
    version="1",
    type="command",
    inputs={
        "raw_data": Input(type="uri_folder"),
        "date_filter": Input(type="string")
    },
    outputs={
        "processed_data": Output(type="uri_folder")
    },
    code="./components/data_prep",
    command="python prepare.py --input ${{inputs.raw_data}} --date ${{inputs.date_filter}} --output ${{outputs.processed_data}}",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1"
)

training_component = Component(
    name="model_training",
    version="1",
    type="command",
    inputs={
        "training_data": Input(type="uri_folder"),
        "learning_rate": Input(type="number"),
        "epochs": Input(type="integer")
    },
    outputs={
        "model": Output(type="mlflow_model")
    },
    code="./components/training",
    command="python train.py --data ${{inputs.training_data}} --lr ${{inputs.learning_rate}} --epochs ${{inputs.epochs}} --output ${{outputs.model}}",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1"
)

# Register components
ml_client.components.create_or_update(data_prep_component)
ml_client.components.create_or_update(training_component)

# Build pipeline
@dsl.pipeline(
    description="End-to-end ML pipeline",
    compute="cpu-cluster"
)
def ml_pipeline(raw_data: Input, date_filter: str, learning_rate: float, epochs: int):
    # Data preparation step
    prep_step = data_prep_component(
        raw_data=raw_data,
        date_filter=date_filter
    )

    # Training step
    train_step = training_component(
        training_data=prep_step.outputs.processed_data,
        learning_rate=learning_rate,
        epochs=epochs
    )

    return {
        "model": train_step.outputs.model
    }

# Create and submit pipeline
pipeline_job = ml_pipeline(
    raw_data=Input(type="uri_folder", path="azureml://datastores/data/paths/raw"),
    date_filter="2022-01-01",
    learning_rate=0.01,
    epochs=100
)

ml_client.jobs.create_or_update(pipeline_job)

Managed Endpoints Improvements

from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration
)

# Create endpoint with enhanced monitoring
endpoint = ManagedOnlineEndpoint(
    name="churn-prediction-endpoint",
    description="Customer churn prediction service",
    auth_mode="key",
    tags={"team": "data-science", "project": "customer-retention"}
)

ml_client.online_endpoints.begin_create_or_update(endpoint).result()

# Deploy with auto-scaling and monitoring
deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name="churn-prediction-endpoint",
    model=registered_model,
    instance_type="Standard_DS3_v2",
    instance_count=2,
    app_insights_enabled=True,
    scale_settings={
        "scale_type": "default",
        "min_instances": 2,
        "max_instances": 10
    },
    request_settings={
        "request_timeout_ms": 90000,
        "max_concurrent_requests_per_instance": 100
    },
    liveness_probe={
        "initial_delay": 10,
        "period": 10,
        "timeout": 2,
        "success_threshold": 1,
        "failure_threshold": 30
    }
)

ml_client.online_deployments.begin_create_or_update(deployment).result()

AutoML Improvements

from azure.ai.ml import automl
from azure.ai.ml.constants import AssetTypes

# Classification with new features
classification_job = automl.classification(
    compute="gpu-cluster",
    experiment_name="customer-classification",
    training_data=Input(type=AssetTypes.MLTABLE, path="./data/train"),
    target_column_name="churn",
    primary_metric="AUC_weighted",
    enable_model_explainability=True,
    enable_stack_ensemble=True,
    enable_vote_ensemble=True,
    n_cross_validations=5,
    validation_data_size=0.2,
    # New in Ignite 2022
    allowed_training_algorithms=[
        "LogisticRegression",
        "LightGBM",
        "XGBoostClassifier",
        "RandomForest"
    ],
    featurization={
        "mode": "auto",
        "blocked_transformers": [],
        "column_name_and_types": {
            "customer_id": "Categorical",
            "signup_date": "DateTime"
        }
    },
    limits={
        "timeout_minutes": 120,
        "trial_timeout_minutes": 20,
        "max_trials": 50,
        "max_concurrent_trials": 4
    }
)

returned_job = ml_client.jobs.create_or_update(classification_job)

Data Labeling Improvements

from azure.ai.ml.entities import LabelingJob, LabelCategory

# Create labeling project
labeling_job = LabelingJob(
    name="image-classification-labeling",
    description="Label product images for classification",
    labeling_type="image_classification",
    data=Input(type="uri_folder", path="azureml://datastores/images/paths/products"),
    label_categories=[
        LabelCategory(name="electronics", display_name="Electronics"),
        LabelCategory(name="clothing", display_name="Clothing"),
        LabelCategory(name="furniture", display_name="Furniture"),
        LabelCategory(name="other", display_name="Other")
    ],
    instructions="Classify each product image into the appropriate category.",
    ml_assist_enabled=True,  # Use ML to suggest labels
    tags={"project": "product-catalog"}
)

ml_client.jobs.create_or_update(labeling_job)

Integration with Azure Synapse

# Use Synapse Spark for large-scale feature engineering
from azure.ai.ml.entities import SynapseSparkCompute

synapse_compute = SynapseSparkCompute(
    name="synapse-spark",
    resource_id="/subscriptions/.../resourceGroups/.../providers/Microsoft.Synapse/workspaces/.../bigDataPools/sparkpool"
)

ml_client.compute.begin_create_or_update(synapse_compute).result()

# Use in pipeline
spark_job = command(
    code="./spark_jobs",
    command="python feature_engineering.py",
    compute="synapse-spark",
    environment="AzureML-PySpark-3.2-py38:1"
)

Conclusion

Azure Machine Learning continues to evolve with features that address real-world ML challenges. The managed feature store, improved responsible AI capabilities, and enhanced MLOps features make it easier to build, deploy, and maintain ML solutions at scale. These Ignite 2022 updates reinforce Microsoft’s commitment to enterprise-grade machine learning.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.