December 12, 2023 1 min read

AI Transparency: Building Understandable AI Systems

AI Transparency Explainability XAI Responsible AI

AI Transparency: Building Understandable AI Systems

Transparency is fundamental to trustworthy AI. Users, regulators, and stakeholders need to understand how AI systems work and make decisions.

Transparency Framework

from dataclasses import dataclass
from typing import List, Dict, Optional
from enum import Enum

class TransparencyLevel(Enum):
    MINIMAL = "Minimal - AI involvement disclosed"
    BASIC = "Basic - General explanation of process"
    DETAILED = "Detailed - Feature-level explanations"
    FULL = "Full - Complete model and data transparency"

class StakeholderType(Enum):
    END_USER = "End User"
    AFFECTED_PERSON = "Affected Person"
    OPERATOR = "Business Operator"
    DEVELOPER = "Developer/Data Scientist"
    AUDITOR = "Auditor/Regulator"

@dataclass
class TransparencyRequirement:
    stakeholder: StakeholderType
    required_level: TransparencyLevel
    information_needed: List[str]
    format: str
    timing: str

transparency_requirements = {
    StakeholderType.END_USER: TransparencyRequirement(
        stakeholder=StakeholderType.END_USER,
        required_level=TransparencyLevel.MINIMAL,
        information_needed=[
            "Disclosure that AI is being used",
            "What the AI does",
            "How to get human assistance"
        ],
        format="Clear, non-technical notification",
        timing="Before or during interaction"
    ),
    StakeholderType.AFFECTED_PERSON: TransparencyRequirement(
        stakeholder=StakeholderType.AFFECTED_PERSON,
        required_level=TransparencyLevel.DETAILED,
        information_needed=[
            "What decision was made",
            "Key factors in the decision",
            "How to contest the decision",
            "Data used in the decision"
        ],
        format="Personalized explanation document",
        timing="Upon request, within 30 days"
    ),
    StakeholderType.OPERATOR: TransparencyRequirement(
        stakeholder=StakeholderType.OPERATOR,
        required_level=TransparencyLevel.DETAILED,
        information_needed=[
            "Model capabilities and limitations",
            "Appropriate use cases",
            "Performance metrics",
            "Monitoring requirements"
        ],
        format="Technical documentation and training",
        timing="Before deployment and ongoing"
    ),
    StakeholderType.AUDITOR: TransparencyRequirement(
        stakeholder=StakeholderType.AUDITOR,
        required_level=TransparencyLevel.FULL,
        information_needed=[
            "Complete model documentation",
            "Training data description",
            "Validation results",
            "Fairness assessments",
            "Incident history"
        ],
        format="Comprehensive audit package",
        timing="On demand"
    )
}

Explainability Implementation

import numpy as np
from typing import Callable, Any

class ExplainabilityEngine:
    """Generate explanations for AI model predictions."""

    def __init__(self, model: Any, feature_names: List[str]):
        self.model = model
        self.feature_names = feature_names

    def local_explanation_lime(
        self,
        instance: np.ndarray,
        num_features: int = 5
    ) -> Dict:
        """Generate LIME-style local explanation."""
        # Simplified LIME implementation
        # In production, use actual LIME library

        # Perturb instance and get predictions
        perturbations = self._generate_perturbations(instance, n=100)
        predictions = self._get_predictions(perturbations)
        weights = self._calculate_weights(instance, perturbations)

        # Fit linear model
        coefficients = self._fit_weighted_linear(perturbations, predictions, weights)

        # Get top features
        feature_importance = list(zip(self.feature_names, coefficients))
        feature_importance.sort(key=lambda x: abs(x[1]), reverse=True)

        return {
            "instance": instance.tolist(),
            "prediction": self._get_predictions([instance])[0],
            "top_features": [
                {"feature": f, "contribution": c}
                for f, c in feature_importance[:num_features]
            ],
            "explanation_type": "LIME"
        }

    def _generate_perturbations(self, instance: np.ndarray, n: int) -> np.ndarray:
        """Generate perturbed instances."""
        noise = np.random.normal(0, 0.1, (n, len(instance)))
        return instance + noise

    def _get_predictions(self, instances: np.ndarray) -> np.ndarray:
        """Get model predictions."""
        # Placeholder - would call actual model
        return np.random.random(len(instances))

    def _calculate_weights(self, instance: np.ndarray, perturbations: np.ndarray) -> np.ndarray:
        """Calculate weights based on distance."""
        distances = np.sqrt(np.sum((perturbations - instance) ** 2, axis=1))
        return np.exp(-distances)

    def _fit_weighted_linear(
        self,
        X: np.ndarray,
        y: np.ndarray,
        weights: np.ndarray
    ) -> np.ndarray:
        """Fit weighted linear regression."""
        # Simplified - would use sklearn in production
        return np.random.random(X.shape[1])

    def global_explanation_shap(self, dataset: np.ndarray) -> Dict:
        """Generate SHAP-style global explanation."""
        # Simplified SHAP implementation
        feature_importance = np.random.random(len(self.feature_names))
        feature_importance = feature_importance / feature_importance.sum()

        return {
            "feature_importance": [
                {"feature": f, "importance": i}
                for f, i in zip(self.feature_names, feature_importance)
            ],
            "explanation_type": "SHAP Global"
        }

    def counterfactual_explanation(
        self,
        instance: np.ndarray,
        desired_outcome: int
    ) -> Dict:
        """Generate counterfactual explanation."""
        # "What would need to change for a different outcome?"
        current_pred = self._get_predictions([instance])[0]

        # Find minimal changes (simplified)
        changes = []
        for i, (feature, value) in enumerate(zip(self.feature_names, instance)):
            # Simplified logic - actual implementation would use optimization
            new_value = value * 1.1  # 10% increase
            changes.append({
                "feature": feature,
                "current_value": value,
                "suggested_value": new_value,
                "change_direction": "increase"
            })

        return {
            "current_outcome": current_pred,
            "desired_outcome": desired_outcome,
            "suggested_changes": changes[:3],  # Top 3 changes
            "explanation_type": "Counterfactual"
        }

User-Facing Transparency

class TransparencyInterface:
    """Generate user-facing transparency content."""

    def __init__(self):
        self.templates = {}

    def generate_ai_disclosure(
        self,
        system_name: str,
        purpose: str,
        interaction_type: str
    ) -> str:
        """Generate AI disclosure notice."""
        return f"""
**AI-Powered Assistance**

This {interaction_type} uses artificial intelligence ({system_name}) to {purpose}.

**What this means:**
- An AI system will process your input
- Responses are generated by AI, not a human
- The AI aims to be helpful but may make mistakes

**Your options:**
- You can request human assistance at any time
- You can ask for an explanation of any AI-generated response
- Your feedback helps us improve

*For questions about how your data is used, see our Privacy Policy.*
"""

    def generate_decision_explanation(
        self,
        decision: str,
        confidence: float,
        factors: List[Dict],
        alternatives: List[str]
    ) -> str:
        """Generate explanation for an AI decision."""
        explanation = f"""
## Decision Summary

**Outcome:** {decision}
**Confidence Level:** {confidence * 100:.0f}%

### Key Factors

The following factors most influenced this decision:

"""
        for i, factor in enumerate(factors[:5], 1):
            direction = "+" if factor.get("contribution", 0) > 0 else "-"
            explanation += f"{i}. **{factor['name']}**: {factor['description']} ({direction})\n"

        explanation += """
### What This Means

"""
        if confidence > 0.8:
            explanation += "This decision has high confidence based on the available data.\n"
        else:
            explanation += "This decision has moderate confidence. Additional review may be helpful.\n"

        explanation += """
### Your Rights

- **Contest this decision:** [Link to appeal process]
- **Request human review:** [Link to request form]
- **Access your data:** [Link to data request]
"""
        return explanation

    def generate_system_documentation(
        self,
        system_info: Dict
    ) -> str:
        """Generate public system documentation."""
        return f"""
# About {system_info['name']}

## What It Does

{system_info['description']}

## How It Works

{system_info['methodology']}

## Data Used

{system_info['data_description']}

**We DO collect:**
{chr(10).join(f'- {item}' for item in system_info.get('data_collected', []))}

**We DO NOT collect:**
{chr(10).join(f'- {item}' for item in system_info.get('data_not_collected', []))}

## Limitations

{chr(10).join(f'- {item}' for item in system_info.get('limitations', []))}

## Questions?

Contact us at: {system_info.get('contact', 'ai-transparency@company.com')}

*Last updated: {system_info.get('last_updated', 'N/A')}*
"""

Audit Trail Implementation

from datetime import datetime
import json
import hashlib

class TransparencyAuditLog:
    """Maintain audit trail for AI system transparency."""

    def __init__(self, system_id: str):
        self.system_id = system_id
        self.entries: List[Dict] = []

    def log_decision(
        self,
        decision_id: str,
        input_data: Dict,
        output: Dict,
        model_version: str,
        explanation: Dict
    ):
        """Log a decision with full context."""
        entry = {
            "decision_id": decision_id,
            "timestamp": datetime.now().isoformat(),
            "system_id": self.system_id,
            "model_version": model_version,
            "input_hash": self._hash_data(input_data),
            "output": output,
            "explanation_available": explanation is not None,
            "explanation_id": self._store_explanation(explanation) if explanation else None
        }

        self.entries.append(entry)
        return entry

    def _hash_data(self, data: Dict) -> str:
        """Create hash of input data for traceability."""
        return hashlib.sha256(json.dumps(data, sort_keys=True).encode()).hexdigest()[:16]

    def _store_explanation(self, explanation: Dict) -> str:
        """Store explanation and return reference ID."""
        # In production, store in database
        return f"exp_{datetime.now().strftime('%Y%m%d%H%M%S')}"

    def retrieve_decision_context(self, decision_id: str) -> Optional[Dict]:
        """Retrieve full context for a decision."""
        for entry in self.entries:
            if entry["decision_id"] == decision_id:
                return entry
        return None

    def generate_audit_report(self, start_date: datetime, end_date: datetime) -> Dict:
        """Generate audit report for time period."""
        relevant = [
            e for e in self.entries
            if start_date.isoformat() <= e["timestamp"] <= end_date.isoformat()
        ]

        return {
            "system_id": self.system_id,
            "period": {"start": start_date.isoformat(), "end": end_date.isoformat()},
            "total_decisions": len(relevant),
            "explanations_generated": sum(1 for e in relevant if e["explanation_available"]),
            "model_versions_used": list(set(e["model_version"] for e in relevant))
        }

Tomorrow, we’ll explore explainability requirements in depth!