Skip to content
Back to Blog
1 min read

Automated Machine Learning with Azure ML AutoML

I wrote “Automated Machine Learning with Azure ML AutoML” to share practical, production-minded guidance on this topic.

AutoML Classification

from azure.ai.ml import MLClient, Input, automl
from azure.ai.ml.automl import ClassificationPrimaryMetrics
from azure.identity import DefaultAzureCredential

ml_client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id="your-subscription",
    resource_group_name="your-rg",
    workspace_name="your-workspace"
)

# Configure AutoML classification
classification_job = automl.classification(
    compute="cpu-cluster",
    experiment_name="automl-classification",
    training_data=Input(path="azureml://datastores/workspaceblobstore/paths/train.csv"),
    target_column_name="target",
    primary_metric=ClassificationPrimaryMetrics.ACCURACY,
    n_cross_validations=5,

    # Limits
    timeout_minutes=60,
    trial_timeout_minutes=20,
    max_trials=50,
    max_concurrent_trials=4,

    # Enable all featurization
    enable_early_termination=True,
    featurization="auto"
)

# Set allowed models
classification_job.set_training(
    allowed_training_algorithms=[
        "LightGBM",
        "XGBoostClassifier",
        "RandomForest",
        "LogisticRegression"
    ]
)

# Submit job
returned_job = ml_client.jobs.create_or_update(classification_job)
print(f"AutoML job submitted: {returned_job.name}")

AutoML Regression

from azure.ai.ml.automl import RegressionPrimaryMetrics

# Configure AutoML regression
regression_job = automl.regression(
    compute="cpu-cluster",
    experiment_name="automl-regression",
    training_data=Input(path="azureml://datastores/workspaceblobstore/paths/housing.csv"),
    target_column_name="price",
    primary_metric=RegressionPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR,
    n_cross_validations=5,

    timeout_minutes=120,
    trial_timeout_minutes=30,
    max_trials=100,
    max_concurrent_trials=4
)

# Enable deep learning
regression_job.set_training(
    enable_dnn_training=True,
    enable_onnx_compatible_models=True
)

returned_job = ml_client.jobs.create_or_update(regression_job)

AutoML Forecasting

from azure.ai.ml.automl import ForecastingPrimaryMetrics

# Configure AutoML forecasting
forecasting_job = automl.forecasting(
    compute="cpu-cluster",
    experiment_name="automl-forecasting",
    training_data=Input(path="azureml://datastores/workspaceblobstore/paths/sales.csv"),
    target_column_name="sales",
    primary_metric=ForecastingPrimaryMetrics.NORMALIZED_ROOT_MEAN_SQUARED_ERROR,
    n_cross_validations=3,

    # Forecasting-specific settings
    forecasting_settings=automl.ForecastingSettings(
        time_column_name="date",
        forecast_horizon=30,
        frequency="D",
        target_lags=[1, 7, 14],
        target_rolling_window_size=7,
        time_series_id_column_names=["store_id", "product_id"]
    ),

    timeout_minutes=180,
    max_trials=50
)

# Enable specific algorithms for time series
forecasting_job.set_training(
    allowed_training_algorithms=[
        "Prophet",
        "AutoArima",
        "ExponentialSmoothing",
        "TCNForecaster"
    ]
)

returned_job = ml_client.jobs.create_or_update(forecasting_job)

Featurization Settings

from azure.ai.ml.automl import ColumnTransformer

# Custom featurization
classification_job = automl.classification(
    # ... basic settings
)

# Configure column transformers
classification_job.set_featurization(
    mode="custom",
    transformer_params={
        "imputer": [
            ColumnTransformer(
                fields=["age", "income"],
                parameters={"strategy": "median"}
            )
        ],
        "hash_one_hot_encoder": [
            ColumnTransformer(
                fields=["city", "state"],
                parameters={"number_of_bits": 10}
            )
        ]
    },
    blocked_transformers=["LabelEncoder"],  # Don't use label encoding
    column_name_and_types={
        "age": "Numeric",
        "income": "Numeric",
        "category": "Categorical",
        "date_joined": "DateTime"
    }
)

Analyzing AutoML Results

# Get job results
automl_job = ml_client.jobs.get(returned_job.name)

# Get all child runs
child_runs = ml_client.jobs.list(parent_job_name=automl_job.name)

results = []
for run in child_runs:
    if run.status == "Completed":
        results.append({
            "algorithm": run.properties.get("run_algorithm"),
            "accuracy": run.properties.get("accuracy"),
            "f1_score": run.properties.get("f1_score_weighted"),
            "duration": run.properties.get("duration")
        })

results_df = pd.DataFrame(results).sort_values("accuracy", ascending=False)
print("Top Models:")
print(results_df.head(10))

Get Best Model

# Get best model from AutoML
best_run_id = automl_job.properties.get("best_child_run_id")
best_run = ml_client.jobs.get(best_run_id)

print(f"Best model: {best_run.properties.get('run_algorithm')}")
print(f"Accuracy: {best_run.properties.get('accuracy')}")

# Download best model
ml_client.jobs.download(
    name=best_run_id,
    output_name="best_model",
    download_path="./outputs"
)

# Register best model
from azure.ai.ml.entities import Model

model = Model(
    path="./outputs/best_model",
    name="automl-best-model",
    description=f"Best model from AutoML run {automl_job.name}",
    type="mlflow_model"
)

ml_client.models.create_or_update(model)

Model Explanations

# Enable model explanations
classification_job = automl.classification(
    # ... settings
)

classification_job.set_training(
    enable_model_explainability=True
)

# After job completes, get explanations
from azure.ai.ml.entities import Model

# Download explanation artifacts
ml_client.jobs.download(
    name=best_run_id,
    output_name="explanations",
    download_path="./explanations"
)

# Load and analyze
import json

with open("./explanations/global_importance.json") as f:
    importance = json.load(f)

for feature, imp in sorted(importance.items(), key=lambda x: -x[1])[:10]:
    print(f"{feature}: {imp:.4f}")

AutoML in Pipelines

from azure.ai.ml import dsl

@dsl.pipeline(
    name="automl_pipeline",
    compute="cpu-cluster"
)
def automl_training_pipeline(training_data: Input, test_data: Input):
    # AutoML training step
    automl_step = automl.classification(
        training_data=training_data,
        target_column_name="target",
        primary_metric="accuracy",
        timeout_minutes=60
    )

    # Evaluation step
    eval_step = evaluate_model(
        model=automl_step.outputs.best_model,
        test_data=test_data
    )

    return {
        "model": automl_step.outputs.best_model,
        "metrics": eval_step.outputs.metrics
    }

# Submit pipeline
pipeline_job = ml_client.jobs.create_or_update(
    automl_training_pipeline(
        training_data=Input(path="azureml://datastores/workspaceblobstore/paths/train.csv"),
        test_data=Input(path="azureml://datastores/workspaceblobstore/paths/test.csv")
    ),
    experiment_name="automl-pipeline"
)

AutoML accelerates model development while maintaining production-quality results.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.