August 1, 2022 1 min read

Azure Machine Learning Updates and New Features

Azure Machine Learning Azure ML AI MLOps

Azure Machine Learning continues to evolve with new features that streamline the ML lifecycle. Let’s explore the latest updates and how they improve your machine learning workflows.

What’s New in Azure ML

The latest updates focus on:

Improved MLOps capabilities
Enhanced responsible AI tools
Simplified deployment options
Better integration with open-source tools

Getting Started with Azure ML SDK v2

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# Connect to workspace
ml_client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id="your-subscription-id",
    resource_group_name="your-resource-group",
    workspace_name="your-workspace"
)

# List compute instances
for compute in ml_client.compute.list():
    print(f"{compute.name}: {compute.type} - {compute.provisioning_state}")

Creating Environments

from azure.ai.ml.entities import Environment

# Create a custom environment
custom_env = Environment(
    name="sklearn-training-env",
    description="Environment for scikit-learn training",
    conda_file="./conda.yaml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest"
)

ml_client.environments.create_or_update(custom_env)

conda.yaml:

name: sklearn-training
channels:
  - conda-forge
dependencies:
  - python=3.9
  - pip
  - pip:
    - scikit-learn==1.1.1
    - pandas==1.4.3
    - numpy==1.23.0
    - mlflow==1.27.0
    - azureml-mlflow==1.43.0

Registering Data Assets

from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# Register a data asset
training_data = Data(
    name="customer-churn-data",
    description="Customer churn training dataset",
    path="azureml://datastores/workspaceblobstore/paths/data/churn.csv",
    type=AssetTypes.URI_FILE
)

ml_client.data.create_or_update(training_data)

# Register a folder of data
image_data = Data(
    name="product-images",
    description="Product image dataset",
    path="azureml://datastores/workspaceblobstore/paths/images/",
    type=AssetTypes.URI_FOLDER
)

ml_client.data.create_or_update(image_data)

Training Jobs with SDK v2

from azure.ai.ml import command, Input

# Define a training job
training_job = command(
    code="./src",
    command="python train.py --data ${{inputs.training_data}} --learning-rate ${{inputs.learning_rate}}",
    inputs={
        "training_data": Input(
            type="uri_file",
            path="azureml://datastores/workspaceblobstore/paths/data/train.csv"
        ),
        "learning_rate": 0.01
    },
    environment="sklearn-training-env@latest",
    compute="cpu-cluster",
    display_name="churn-training",
    experiment_name="customer-churn",
    description="Training job for customer churn prediction"
)

# Submit the job
returned_job = ml_client.jobs.create_or_update(training_job)
print(f"Job submitted: {returned_job.name}")

# Wait for completion
ml_client.jobs.stream(returned_job.name)

Model Registration

from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

# Register model from job output
model = Model(
    name="churn-predictor",
    description="Customer churn prediction model",
    path=f"azureml://jobs/{returned_job.name}/outputs/model",
    type=AssetTypes.MLFLOW_MODEL
)

ml_client.models.create_or_update(model)

# List model versions
for m in ml_client.models.list(name="churn-predictor"):
    print(f"Version {m.version}: {m.description}")

Managed Online Endpoints

from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment

# Create endpoint
endpoint = ManagedOnlineEndpoint(
    name="churn-endpoint",
    description="Endpoint for churn prediction",
    auth_mode="key"
)

ml_client.online_endpoints.begin_create_or_update(endpoint).result()

# Create deployment
deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name="churn-endpoint",
    model="azureml:churn-predictor:1",
    instance_type="Standard_DS2_v2",
    instance_count=1
)

ml_client.online_deployments.begin_create_or_update(deployment).result()

# Test the endpoint
result = ml_client.online_endpoints.invoke(
    endpoint_name="churn-endpoint",
    request_file="./sample-request.json"
)
print(result)

Enhanced Experiment Tracking

import mlflow

# Start MLflow run
mlflow.set_experiment("customer-churn")

with mlflow.start_run():
    # Log parameters
    mlflow.log_param("algorithm", "RandomForest")
    mlflow.log_param("n_estimators", 100)

    # Train model
    model = train_model(X_train, y_train)

    # Log metrics
    accuracy = model.score(X_test, y_test)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)

    # Log artifacts
    mlflow.log_artifact("confusion_matrix.png")

    # Log model
    mlflow.sklearn.log_model(model, "model")

Azure Machine Learning SDK v2 provides a cleaner, more intuitive API for the complete ML lifecycle.