1 min read
Error Analysis for Machine Learning Models
I wrote “Error Analysis for Machine Learning Models” to share practical, production-minded guidance on this topic.
Understanding Error Analysis
Error analysis identifies:
- Cohorts with high error rates
- Root causes of model failures
- Patterns in misclassifications
- Data quality issues
Setting Up Error Analysis
from raiwidgets import ErrorAnalysisDashboard
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
# Load and prepare data
df = pd.read_csv("customer_data.csv")
X = df.drop("churn", axis=1)
y = df["churn"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Train model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)
# Launch Error Analysis Dashboard
ErrorAnalysisDashboard(
global_explanation=None,
model=model,
dataset=X_test,
true_y=y_test,
categorical_features=["contract_type", "payment_method"],
features=X_test.columns.tolist()
)
Error Tree Analysis
from erroranalysis import ModelAnalyzer
from erroranalysis._internal.constants import ModelTask
# Create analyzer
analyzer = ModelAnalyzer(
model=model,
dataset=X_test,
true_y=y_test,
feature_names=X_test.columns.tolist(),
categorical_features=["contract_type", "payment_method"],
model_task=ModelTask.CLASSIFICATION
)
# Build error tree
tree = analyzer.compute_error_tree(
max_depth=4,
num_leaves=31,
min_child_samples=20
)
# Analyze tree nodes
def print_tree(node, depth=0):
"""Print error tree structure"""
indent = " " * depth
if node.is_leaf:
print(f"{indent}Leaf: {node.error_rate:.2%} error, {node.size} samples")
else:
print(f"{indent}Split on {node.feature} at {node.threshold}")
print_tree(node.left, depth + 1)
print_tree(node.right, depth + 1)
print_tree(tree.root)
Identifying Error Cohorts
import numpy as np
from sklearn.metrics import classification_report
class ErrorCohortAnalyzer:
def __init__(self, model, X, y):
self.model = model
self.X = X
self.y = y
self.predictions = model.predict(X)
self.errors = self.predictions != y
def find_high_error_cohorts(self, features, bins=10):
"""Find cohorts with high error rates"""
cohorts = []
for feature in features:
if self.X[feature].dtype in ['object', 'category']:
# Categorical feature
for value in self.X[feature].unique():
mask = self.X[feature] == value
error_rate = self.errors[mask].mean()
size = mask.sum()
cohorts.append({
'feature': feature,
'condition': f"{feature} == {value}",
'error_rate': error_rate,
'size': size,
'mask': mask
})
else:
# Numerical feature - bin it
percentiles = np.percentile(self.X[feature], np.linspace(0, 100, bins + 1))
for i in range(len(percentiles) - 1):
low, high = percentiles[i], percentiles[i + 1]
mask = (self.X[feature] >= low) & (self.X[feature] < high)
if mask.sum() > 0:
error_rate = self.errors[mask].mean()
cohorts.append({
'feature': feature,
'condition': f"{feature} in [{low:.2f}, {high:.2f})",
'error_rate': error_rate,
'size': mask.sum(),
'mask': mask
})
# Sort by error rate
cohorts.sort(key=lambda x: x['error_rate'], reverse=True)
return cohorts
def analyze_cohort(self, mask):
"""Detailed analysis of a specific cohort"""
y_true = self.y[mask]
y_pred = self.predictions[mask]
return {
'size': mask.sum(),
'error_rate': (y_pred != y_true).mean(),
'report': classification_report(y_true, y_pred, output_dict=True)
}
# Usage
analyzer = ErrorCohortAnalyzer(model, X_test, y_test)
high_error_cohorts = analyzer.find_high_error_cohorts(X_test.columns)
print("Top 5 High-Error Cohorts:")
for cohort in high_error_cohorts[:5]:
print(f" {cohort['condition']}: {cohort['error_rate']:.2%} error ({cohort['size']} samples)")
Confusion Matrix Analysis
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
def analyze_confusion_by_cohort(model, X, y, feature, value):
"""Analyze confusion matrix for a specific cohort"""
mask = X[feature] == value if X[feature].dtype == 'object' else X[feature] >= value
y_true = y[mask]
y_pred = model.predict(X[mask])
cm = confusion_matrix(y_true, y_pred)
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Overall confusion matrix
ConfusionMatrixDisplay.from_predictions(y, model.predict(X), ax=axes[0])
axes[0].set_title("Overall")
# Cohort confusion matrix
ConfusionMatrixDisplay.from_predictions(y_true, y_pred, ax=axes[1])
axes[1].set_title(f"Cohort: {feature} = {value}")
plt.tight_layout()
plt.savefig("confusion_comparison.png")
analyze_confusion_by_cohort(model, X_test, y_test, "contract_type", "month-to-month")
Error Pattern Detection
class ErrorPatternDetector:
def __init__(self, model, X, y):
self.model = model
self.X = X
self.y = y
self.predictions = model.predict(X)
self.probabilities = model.predict_proba(X)
def find_confident_errors(self, threshold=0.9):
"""Find high-confidence incorrect predictions"""
errors = self.predictions != self.y
max_probs = np.max(self.probabilities, axis=1)
confident_errors = errors & (max_probs >= threshold)
error_indices = np.where(confident_errors)[0]
return pd.DataFrame({
'index': error_indices,
'true_label': self.y.iloc[error_indices].values,
'predicted': self.predictions[error_indices],
'confidence': max_probs[error_indices]
}).sort_values('confidence', ascending=False)
def find_boundary_errors(self, threshold=0.55):
"""Find errors near decision boundary"""
errors = self.predictions != self.y
max_probs = np.max(self.probabilities, axis=1)
boundary_errors = errors & (max_probs <= threshold)
error_indices = np.where(boundary_errors)[0]
return pd.DataFrame({
'index': error_indices,
'true_label': self.y.iloc[error_indices].values,
'predicted': self.predictions[error_indices],
'confidence': max_probs[error_indices]
})
# Usage
detector = ErrorPatternDetector(model, X_test, y_test)
confident_errors = detector.find_confident_errors(threshold=0.9)
print(f"Found {len(confident_errors)} high-confidence errors")
boundary_errors = detector.find_boundary_errors(threshold=0.55)
print(f"Found {len(boundary_errors)} boundary errors")
Actionable Insights
def generate_error_report(analyzer):
"""Generate actionable error analysis report"""
report = []
# Find worst cohorts
cohorts = analyzer.find_high_error_cohorts(X_test.columns)
report.append("## High-Error Cohorts\n")
for cohort in cohorts[:5]:
if cohort['error_rate'] > 0.3: # 30% error threshold
report.append(f"- **{cohort['condition']}**: {cohort['error_rate']:.1%} error rate")
report.append(f" - Recommendation: Collect more training data for this segment")
# Add model improvement suggestions
report.append("\n## Recommendations\n")
report.append("1. Review feature engineering for high-error cohorts")
report.append("2. Consider separate models for distinct data segments")
report.append("3. Investigate data quality issues in error-prone regions")
return "\n".join(report)
print(generate_error_report(analyzer))
Error analysis reveals the weaknesses in your model and guides targeted improvements.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n