Skip to content
Back to Blog
2 min read

AI Ethics and Responsible AI: Building Trustworthy Systems

I wrote “AI Ethics and Responsible AI: Building Trustworthy Systems” to share practical, production-minded guidance on this topic.

Microsoft’s Responsible AI Principles

Microsoft’s framework provides a solid foundation:

  1. Fairness: AI should treat all people fairly
  2. Reliability & Safety: AI should perform reliably and safely
  3. Privacy & Security: AI should be secure and respect privacy
  4. Inclusiveness: AI should empower everyone
  5. Transparency: AI should be understandable
  6. Accountability: People should be accountable for AI systems

Implementing Fairness

Bias Detection

from fairlearn.metrics import MetricFrame, selection_rate
from sklearn.metrics import accuracy_score, precision_score, recall_score

def assess_model_fairness(y_true, y_pred, sensitive_features):
    """Assess model fairness across different groups."""

    metric_frame = MetricFrame(
        metrics={
            'accuracy': accuracy_score,
            'precision': precision_score,
            'recall': recall_score,
            'selection_rate': selection_rate
        },
        y_true=y_true,
        y_pred=y_pred,
        sensitive_features=sensitive_features
    )

    print("Overall metrics:")
    print(metric_frame.overall)

    print("\nMetrics by group:")
    print(metric_frame.by_group)

    print("\nDisparities:")
    print(metric_frame.difference(method='between_groups'))

    return metric_frame

# Usage
fairness_report = assess_model_fairness(
    y_true=test_labels,
    y_pred=predictions,
    sensitive_features=demographics['gender']
)

Bias Mitigation

from fairlearn.reductions import ExponentiatedGradient, DemographicParity

def train_fair_model(X_train, y_train, sensitive_features, base_estimator):
    """Train a model with fairness constraints."""

    # Define the fairness constraint
    constraint = DemographicParity()

    # Wrap the estimator with fairness reduction
    mitigator = ExponentiatedGradient(
        estimator=base_estimator,
        constraints=constraint
    )

    # Fit the fair model
    mitigator.fit(X_train, y_train, sensitive_features=sensitive_features)

    return mitigator

Building Transparent AI

Explainability

import shap

def explain_prediction(model, X_sample, feature_names):
    """Generate explanation for a prediction."""

    # Create explainer
    explainer = shap.TreeExplainer(model)

    # Calculate SHAP values
    shap_values = explainer.shap_values(X_sample)

    # Generate explanation
    explanation = {
        'prediction': model.predict(X_sample)[0],
        'feature_contributions': dict(zip(
            feature_names,
            shap_values[0]
        )),
        'base_value': explainer.expected_value
    }

    return explanation

# Usage
explanation = explain_prediction(
    model=trained_model,
    X_sample=customer_data,
    feature_names=['age', 'income', 'credit_score', 'tenure']
)

print(f"Prediction: {explanation['prediction']}")
print("Feature contributions:")
for feature, contribution in sorted(
    explanation['feature_contributions'].items(),
    key=lambda x: abs(x[1]),
    reverse=True
):
    direction = "increases" if contribution > 0 else "decreases"
    print(f"  {feature}: {direction} prediction by {abs(contribution):.3f}")

Model Cards

Document your models:

# model_card.yaml
model_name: Customer Churn Predictor
version: 2.1.0
date: 2022-12-06

model_details:
  description: Predicts customer churn probability
  type: Binary Classification
  architecture: Gradient Boosting (LightGBM)
  training_data: Customer records 2020-2022
  input_features:
    - tenure (months)
    - monthly_charges
    - total_charges
    - contract_type
    - payment_method
  output: Churn probability (0-1)

intended_use:
  primary_use: Identify at-risk customers for retention campaigns
  primary_users: Customer success team
  out_of_scope_uses:
    - Credit decisions
    - Employment decisions
    - Any use affecting individual rights

performance:
  overall_accuracy: 0.85
  auc_roc: 0.89
  by_segment:
    - segment: "Age < 30"
      accuracy: 0.83
    - segment: "Age 30-50"
      accuracy: 0.87
    - segment: "Age > 50"
      accuracy: 0.84

ethical_considerations:
  sensitive_features_used: false
  fairness_evaluation: |
    Model evaluated for disparate impact across age groups.
    No significant disparities found (max difference: 4%).
  known_limitations:
    - May underperform for customers with < 3 months tenure
    - Not validated for business customers

recommendations:
  - Monitor performance monthly for drift
  - Re-evaluate fairness quarterly
  - Human review required for high-value customers

Privacy Protection

Differential Privacy

from diffprivlib.models import GaussianNB

def train_private_model(X_train, y_train, epsilon=1.0):
    """Train a model with differential privacy guarantees."""

    # Epsilon controls privacy-utility tradeoff
    # Lower epsilon = more privacy, less accuracy
    model = GaussianNB(epsilon=epsilon)
    model.fit(X_train, y_train)

    return model

# Privacy budget management
class PrivacyBudget:
    def __init__(self, total_epsilon: float):
        self.total_epsilon = total_epsilon
        self.spent_epsilon = 0.0

    def allocate(self, epsilon: float) -> bool:
        if self.spent_epsilon + epsilon > self.total_epsilon:
            return False
        self.spent_epsilon += epsilon
        return True

    @property
    def remaining(self) -> float:
        return self.total_epsilon - self.spent_epsilon

Data Anonymization

import hashlib
from typing import Dict, Any

def anonymize_record(record: Dict[str, Any], config: dict) -> Dict[str, Any]:
    """Anonymize a data record based on configuration."""

    result = {}

    for field, value in record.items():
        if field in config.get('remove', []):
            continue  # Don't include this field

        elif field in config.get('hash', []):
            # One-way hash for pseudonymization
            result[field] = hashlib.sha256(str(value).encode()).hexdigest()[:16]

        elif field in config.get('generalize', {}):
            # Generalize to reduce granularity
            generalizer = config['generalize'][field]
            result[field] = generalizer(value)

        elif field in config.get('noise', {}):
            # Add noise for numerical values
            noise_level = config['noise'][field]
            result[field] = value + random.gauss(0, noise_level)

        else:
            result[field] = value

    return result

# Configuration
anonymization_config = {
    'remove': ['ssn', 'full_address'],
    'hash': ['email', 'phone'],
    'generalize': {
        'age': lambda x: f"{(x // 10) * 10}-{(x // 10) * 10 + 9}",  # 25 -> "20-29"
        'zip_code': lambda x: x[:3] + "**"  # 98101 -> "981**"
    },
    'noise': {
        'income': 5000  # Add Gaussian noise with std=5000
    }
}

Content Safety

Input Filtering

from azure.ai.contentsafety import ContentSafetyClient
from azure.ai.contentsafety.models import AnalyzeTextOptions

class ContentModerator:
    def __init__(self, endpoint: str, key: str):
        self.client = ContentSafetyClient(endpoint, AzureKeyCredential(key))

    def is_safe(self, text: str) -> tuple[bool, dict]:
        """Check if content is safe to process."""

        request = AnalyzeTextOptions(text=text)
        response = self.client.analyze_text(request)

        categories = {
            'hate': response.hate_result.severity,
            'self_harm': response.self_harm_result.severity,
            'sexual': response.sexual_result.severity,
            'violence': response.violence_result.severity
        }

        # Block if any category exceeds threshold
        max_severity = max(categories.values())
        is_safe = max_severity <= 2  # Allow low severity only

        return is_safe, categories

    def filter_request(self, user_input: str) -> str:
        """Filter user input before processing."""

        is_safe, categories = self.is_safe(user_input)

        if not is_safe:
            # Log for review
            self._log_blocked_content(user_input, categories)
            raise ContentBlockedException(
                "Content blocked due to policy violation"
            )

        return user_input

Governance Framework

AI Registry

from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional

@dataclass
class AIModelRegistration:
    model_id: str
    name: str
    version: str
    purpose: str
    owner: str
    risk_level: str  # low, medium, high, critical
    deployment_status: str
    created_at: datetime
    last_reviewed: datetime
    review_frequency_days: int
    fairness_evaluation: dict
    privacy_assessment: dict
    approved_by: List[str]

class AIGovernanceRegistry:
    def __init__(self, storage_client):
        self.storage = storage_client

    def register_model(self, registration: AIModelRegistration) -> str:
        """Register a new AI model."""

        # Validate required approvals based on risk level
        required_approvals = self._get_required_approvals(registration.risk_level)
        if not self._has_required_approvals(registration, required_approvals):
            raise ApprovalRequiredException(
                f"Model requires approval from: {required_approvals}"
            )

        # Store registration
        self.storage.save(registration)

        return registration.model_id

    def get_models_due_for_review(self) -> List[AIModelRegistration]:
        """Get models that need periodic review."""

        all_models = self.storage.get_all()
        due_for_review = []

        for model in all_models:
            days_since_review = (datetime.utcnow() - model.last_reviewed).days
            if days_since_review >= model.review_frequency_days:
                due_for_review.append(model)

        return due_for_review

    def _get_required_approvals(self, risk_level: str) -> List[str]:
        return {
            'low': ['team_lead'],
            'medium': ['team_lead', 'data_governance'],
            'high': ['team_lead', 'data_governance', 'legal'],
            'critical': ['team_lead', 'data_governance', 'legal', 'executive']
        }[risk_level]

Practical Checklist

Before deploying an AI system:

  • Fairness evaluation completed across protected groups
  • Model card documented with intended use and limitations
  • Privacy impact assessment completed
  • Content safety measures implemented
  • Explainability mechanism in place
  • Human oversight process defined
  • Monitoring and alerting configured
  • Incident response plan documented
  • Regular review schedule established
  • User feedback mechanism available

Conclusion

Responsible AI isn’t optional - it’s essential. As we build more powerful AI systems, we must ensure they’re fair, transparent, safe, and accountable. The frameworks and tools exist; we need to commit to using them.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.