1 min read
Fairness Assessment for Machine Learning Models
I wrote “Fairness Assessment for Machine Learning Models” to share practical, production-minded guidance on this topic.
Understanding Fairness Metrics
Key fairness metrics include:
- Demographic Parity: Equal positive prediction rates across groups
- Equalized Odds: Equal true positive and false positive rates
- Equal Opportunity: Equal true positive rates
- Predictive Parity: Equal precision across groups
Setting Up Fairness Analysis
from fairlearn.metrics import (
MetricFrame,
selection_rate,
demographic_parity_difference,
demographic_parity_ratio,
equalized_odds_difference
)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
# Load data with sensitive features
df = pd.read_csv("lending_data.csv")
X = df.drop(["approved", "gender", "race"], axis=1)
y = df["approved"]
sensitive_features = df[["gender", "race"]]
# Get predictions
y_pred = model.predict(X)
# Create MetricFrame
metrics = {
"accuracy": accuracy_score,
"precision": precision_score,
"recall": recall_score,
"selection_rate": selection_rate
}
metric_frame = MetricFrame(
metrics=metrics,
y_true=y,
y_pred=y_pred,
sensitive_features=sensitive_features["gender"]
)
# View results
print("Metrics by Gender:")
print(metric_frame.by_group)
print("\nDifferences:")
print(metric_frame.difference(method="between_groups"))
Visualizing Fairness
import matplotlib.pyplot as plt
from fairlearn.metrics import plot_model_comparison
# Compare metrics across groups
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Selection rate by gender
metric_frame.by_group["selection_rate"].plot(
kind="bar", ax=axes[0, 0], title="Selection Rate by Gender"
)
# Accuracy by gender
metric_frame.by_group["accuracy"].plot(
kind="bar", ax=axes[0, 1], title="Accuracy by Gender"
)
# For race
metric_frame_race = MetricFrame(
metrics=metrics,
y_true=y,
y_pred=y_pred,
sensitive_features=sensitive_features["race"]
)
metric_frame_race.by_group["selection_rate"].plot(
kind="bar", ax=axes[1, 0], title="Selection Rate by Race"
)
metric_frame_race.by_group["accuracy"].plot(
kind="bar", ax=axes[1, 1], title="Accuracy by Race"
)
plt.tight_layout()
plt.savefig("fairness_analysis.png")
Fairness Constraints in Training
from fairlearn.reductions import (
ExponentiatedGradient,
DemographicParity,
EqualizedOdds
)
from sklearn.linear_model import LogisticRegression
# Base estimator
base_estimator = LogisticRegression(max_iter=1000)
# Apply demographic parity constraint
mitigator = ExponentiatedGradient(
estimator=base_estimator,
constraints=DemographicParity()
)
# Train with fairness constraint
mitigator.fit(X, y, sensitive_features=sensitive_features["gender"])
# Get predictions from mitigated model
y_pred_mitigated = mitigator.predict(X)
# Compare metrics
print("Original Model:")
print(f" Accuracy: {accuracy_score(y, y_pred):.3f}")
print(f" Demographic Parity Difference: {demographic_parity_difference(y, y_pred, sensitive_features=sensitive_features['gender']):.3f}")
print("\nMitigated Model:")
print(f" Accuracy: {accuracy_score(y, y_pred_mitigated):.3f}")
print(f" Demographic Parity Difference: {demographic_parity_difference(y, y_pred_mitigated, sensitive_features=sensitive_features['gender']):.3f}")
Post-Processing Mitigation
from fairlearn.postprocessing import ThresholdOptimizer
# Create threshold optimizer
postprocess = ThresholdOptimizer(
estimator=model,
constraints="equalized_odds",
objective="balanced_accuracy_score",
prefit=True
)
# Fit the post-processor
postprocess.fit(X, y, sensitive_features=sensitive_features["gender"])
# Get fair predictions
y_pred_fair = postprocess.predict(X, sensitive_features=sensitive_features["gender"])
# Compare
print("\nPost-processed Model:")
print(f" Accuracy: {accuracy_score(y, y_pred_fair):.3f}")
print(f" Equalized Odds Difference: {equalized_odds_difference(y, y_pred_fair, sensitive_features=sensitive_features['gender']):.3f}")
Fairness Dashboard in Azure ML
from raiwidgets import FairnessDashboard
# Launch fairness dashboard
FairnessDashboard(
sensitive_features=sensitive_features,
y_true=y,
y_pred={"Original": y_pred, "Mitigated": y_pred_mitigated}
)
Integration with Azure ML Pipeline
from azure.ai.ml import dsl, Input, Output
from azure.ai.ml.entities import Environment
@dsl.pipeline(
compute="cpu-cluster",
description="Fairness Assessment Pipeline"
)
def fairness_pipeline(
input_data: Input,
model: Input
):
# Assessment step
fairness_assessment = assess_fairness(
data=input_data,
model=model,
sensitive_features=["gender", "race"],
metrics=["selection_rate", "accuracy", "recall"]
)
# Mitigation step
mitigated_model = mitigate_bias(
model=model,
data=input_data,
sensitive_features=["gender"],
constraint="demographic_parity"
)
return {
"fairness_report": fairness_assessment.outputs.report,
"mitigated_model": mitigated_model.outputs.model
}
Fairness Monitoring in Production
import logging
from datetime import datetime
class FairnessMonitor:
def __init__(self, model, sensitive_features, thresholds):
self.model = model
self.sensitive_features = sensitive_features
self.thresholds = thresholds
self.history = []
def assess(self, X, y_true, sensitive_data):
"""Assess fairness on new data"""
y_pred = self.model.predict(X)
results = {}
for feature in self.sensitive_features:
metric_frame = MetricFrame(
metrics={"selection_rate": selection_rate},
y_true=y_true,
y_pred=y_pred,
sensitive_features=sensitive_data[feature]
)
disparity = metric_frame.difference(method="between_groups")["selection_rate"]
results[feature] = {
"disparity": disparity,
"threshold_exceeded": disparity > self.thresholds.get(feature, 0.1)
}
if results[feature]["threshold_exceeded"]:
logging.warning(
f"Fairness threshold exceeded for {feature}: {disparity:.3f}"
)
self.history.append({
"timestamp": datetime.utcnow().isoformat(),
"results": results
})
return results
def get_trend(self, feature, days=30):
"""Get fairness trend over time"""
recent = [h for h in self.history
if (datetime.utcnow() - datetime.fromisoformat(h["timestamp"])).days <= days]
return [h["results"][feature]["disparity"] for h in recent]
Fairness assessment ensures your ML models treat all groups equitably and comply with ethical standards.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n