4 min read
MLflow in Fabric: Experiment Tracking and Model Management
MLflow is deeply integrated into Fabric for experiment tracking and model management. Today we’ll explore MLflow features and best practices in Fabric.
MLflow in Fabric Overview
# MLflow capabilities in Fabric
mlflow_features = {
"experiment_tracking": "Log parameters, metrics, artifacts",
"model_registry": "Version and manage models",
"autologging": "Automatic parameter/metric capture",
"ui": "Visual experiment comparison",
"api": "Programmatic access to all features"
}
# MLflow is pre-configured in Fabric notebooks
import mlflow
print(f"Tracking URI: {mlflow.get_tracking_uri()}")
print(f"MLflow version: {mlflow.__version__}")
Experiment Management
Creating and Managing Experiments
import mlflow
# Create new experiment
experiment_name = "customer_churn_prediction"
mlflow.set_experiment(experiment_name)
# Get experiment info
experiment = mlflow.get_experiment_by_name(experiment_name)
print(f"Experiment ID: {experiment.experiment_id}")
print(f"Artifact Location: {experiment.artifact_location}")
# List all experiments
experiments = mlflow.search_experiments()
for exp in experiments:
print(f"{exp.name}: {exp.experiment_id}")
Organizing with Tags
# Set experiment-level tags
mlflow.set_experiment_tag("team", "data_science")
mlflow.set_experiment_tag("project", "customer_analytics")
mlflow.set_experiment_tag("owner", "michael.pena")
# Search experiments by tag
experiments = mlflow.search_experiments(
filter_string="tags.team = 'data_science'"
)
Comprehensive Run Tracking
Logging Parameters
with mlflow.start_run(run_name="experiment_v1"):
# Log individual parameters
mlflow.log_param("learning_rate", 0.01)
mlflow.log_param("n_estimators", 100)
# Log multiple parameters
params = {
"max_depth": 10,
"min_samples_split": 5,
"criterion": "gini",
"random_state": 42
}
mlflow.log_params(params)
# Log nested parameters (flattened)
model_config = {
"model": {
"type": "random_forest",
"hyperparameters": {
"n_estimators": 100
}
}
}
mlflow.log_params({"config": str(model_config)})
Logging Metrics
with mlflow.start_run():
# Log single metrics
mlflow.log_metric("accuracy", 0.95)
mlflow.log_metric("precision", 0.93)
# Log metric with step (for training curves)
for epoch in range(100):
loss = train_one_epoch()
mlflow.log_metric("training_loss", loss, step=epoch)
# Log multiple metrics
metrics = {
"test_accuracy": 0.94,
"test_precision": 0.92,
"test_recall": 0.91,
"test_f1": 0.915
}
mlflow.log_metrics(metrics)
Logging Artifacts
import matplotlib.pyplot as plt
import json
with mlflow.start_run():
# Log figure
fig, ax = plt.subplots()
ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
ax.set_title("Training Progress")
mlflow.log_figure(fig, "training_progress.png")
plt.close()
# Log file
with open("/tmp/feature_list.json", "w") as f:
json.dump({"features": ["col1", "col2", "col3"]}, f)
mlflow.log_artifact("/tmp/feature_list.json")
# Log directory
mlflow.log_artifacts("/tmp/model_outputs/", artifact_path="outputs")
# Log text
mlflow.log_text("Model trained successfully", "training_notes.txt")
# Log dictionary as JSON
mlflow.log_dict({"config": "value"}, "config.json")
Autologging
# Enable autologging for common frameworks
# Scikit-learn
mlflow.sklearn.autolog()
# PyTorch
mlflow.pytorch.autolog()
# TensorFlow/Keras
mlflow.tensorflow.autolog()
# XGBoost
mlflow.xgboost.autolog()
# LightGBM
mlflow.lightgbm.autolog()
# Example with autolog
mlflow.sklearn.autolog(
log_input_examples=True,
log_model_signatures=True,
log_models=True,
log_post_training_metrics=True
)
from sklearn.ensemble import RandomForestClassifier
# Just train - everything is logged automatically!
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)
# Disable after use
mlflow.sklearn.autolog(disable=True)
Searching and Comparing Runs
# Search runs with filters
runs = mlflow.search_runs(
experiment_ids=[experiment.experiment_id],
filter_string="metrics.accuracy > 0.9 AND params.n_estimators = '100'",
order_by=["metrics.f1_score DESC"]
)
print(runs[['run_id', 'params.n_estimators', 'metrics.accuracy']].head())
# Get best run
best_run = runs.iloc[0]
print(f"Best run ID: {best_run.run_id}")
print(f"Best accuracy: {best_run['metrics.accuracy']}")
# Load best model
best_model = mlflow.sklearn.load_model(f"runs:/{best_run.run_id}/model")
Nested Runs
# Parent run for experiment
with mlflow.start_run(run_name="hyperparameter_search") as parent:
best_score = 0
best_params = None
# Child runs for each configuration
for lr in [0.01, 0.1, 0.5]:
for depth in [5, 10, 15]:
with mlflow.start_run(run_name=f"lr={lr}_depth={depth}", nested=True):
params = {"learning_rate": lr, "max_depth": depth}
mlflow.log_params(params)
# Train and evaluate
model = train_model(X_train, y_train, **params)
score = evaluate_model(model, X_test, y_test)
mlflow.log_metric("score", score)
if score > best_score:
best_score = score
best_params = params
# Log best results to parent
mlflow.log_params({"best_" + k: v for k, v in best_params.items()})
mlflow.log_metric("best_score", best_score)
Custom Model Logging
# Log model with custom signature
from mlflow.models import infer_signature
with mlflow.start_run():
model = train_model(X_train, y_train)
# Infer signature from data
signature = infer_signature(X_train, model.predict(X_train))
# Log with signature and input example
mlflow.sklearn.log_model(
model,
artifact_path="model",
signature=signature,
input_example=X_train[:5],
registered_model_name="my_production_model"
)
Model Registry Operations
from mlflow import MlflowClient
client = MlflowClient()
# List registered models
for model in client.search_registered_models():
print(f"Model: {model.name}")
# Get model details
model_details = client.get_registered_model("my_production_model")
print(f"Latest versions: {[v.version for v in model_details.latest_versions]}")
# Compare model versions
versions = client.search_model_versions("name='my_production_model'")
for v in versions:
run = mlflow.get_run(v.run_id)
print(f"Version {v.version}: accuracy={run.data.metrics.get('accuracy')}")
Best Practices
best_practices = {
"naming": {
"experiments": "project_task_version (e.g., churn_classification_v2)",
"runs": "descriptive_unique_identifier"
},
"organization": {
"tags": "Use for filtering and grouping",
"artifacts": "Organize in subdirectories"
},
"reproducibility": {
"log_code": "Always log training code as artifact",
"log_data": "Log data version or hash",
"set_seed": "Log random seed for reproducibility"
},
"efficiency": {
"batch_logging": "Use log_params/log_metrics for multiple values",
"autolog": "Enable for standard frameworks",
"cleanup": "Delete failed/test runs"
}
}
# Reproducibility example
with mlflow.start_run():
# Log git commit
import subprocess
git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip()
mlflow.set_tag("git_commit", git_hash)
# Log data version
mlflow.log_param("data_version", "2023-07-25")
mlflow.log_param("data_rows", len(X_train))
# Log training code
mlflow.log_artifact("train.py")
Tomorrow we’ll explore experiment tracking patterns in detail.