6 min read
AI Transparency: Building Understandable AI Systems
AI Transparency: Building Understandable AI Systems
Transparency is fundamental to trustworthy AI. Users, regulators, and stakeholders need to understand how AI systems work and make decisions.
Transparency Framework
from dataclasses import dataclass
from typing import List, Dict, Optional
from enum import Enum
class TransparencyLevel(Enum):
MINIMAL = "Minimal - AI involvement disclosed"
BASIC = "Basic - General explanation of process"
DETAILED = "Detailed - Feature-level explanations"
FULL = "Full - Complete model and data transparency"
class StakeholderType(Enum):
END_USER = "End User"
AFFECTED_PERSON = "Affected Person"
OPERATOR = "Business Operator"
DEVELOPER = "Developer/Data Scientist"
AUDITOR = "Auditor/Regulator"
@dataclass
class TransparencyRequirement:
stakeholder: StakeholderType
required_level: TransparencyLevel
information_needed: List[str]
format: str
timing: str
transparency_requirements = {
StakeholderType.END_USER: TransparencyRequirement(
stakeholder=StakeholderType.END_USER,
required_level=TransparencyLevel.MINIMAL,
information_needed=[
"Disclosure that AI is being used",
"What the AI does",
"How to get human assistance"
],
format="Clear, non-technical notification",
timing="Before or during interaction"
),
StakeholderType.AFFECTED_PERSON: TransparencyRequirement(
stakeholder=StakeholderType.AFFECTED_PERSON,
required_level=TransparencyLevel.DETAILED,
information_needed=[
"What decision was made",
"Key factors in the decision",
"How to contest the decision",
"Data used in the decision"
],
format="Personalized explanation document",
timing="Upon request, within 30 days"
),
StakeholderType.OPERATOR: TransparencyRequirement(
stakeholder=StakeholderType.OPERATOR,
required_level=TransparencyLevel.DETAILED,
information_needed=[
"Model capabilities and limitations",
"Appropriate use cases",
"Performance metrics",
"Monitoring requirements"
],
format="Technical documentation and training",
timing="Before deployment and ongoing"
),
StakeholderType.AUDITOR: TransparencyRequirement(
stakeholder=StakeholderType.AUDITOR,
required_level=TransparencyLevel.FULL,
information_needed=[
"Complete model documentation",
"Training data description",
"Validation results",
"Fairness assessments",
"Incident history"
],
format="Comprehensive audit package",
timing="On demand"
)
}
Explainability Implementation
import numpy as np
from typing import Callable, Any
class ExplainabilityEngine:
"""Generate explanations for AI model predictions."""
def __init__(self, model: Any, feature_names: List[str]):
self.model = model
self.feature_names = feature_names
def local_explanation_lime(
self,
instance: np.ndarray,
num_features: int = 5
) -> Dict:
"""Generate LIME-style local explanation."""
# Simplified LIME implementation
# In production, use actual LIME library
# Perturb instance and get predictions
perturbations = self._generate_perturbations(instance, n=100)
predictions = self._get_predictions(perturbations)
weights = self._calculate_weights(instance, perturbations)
# Fit linear model
coefficients = self._fit_weighted_linear(perturbations, predictions, weights)
# Get top features
feature_importance = list(zip(self.feature_names, coefficients))
feature_importance.sort(key=lambda x: abs(x[1]), reverse=True)
return {
"instance": instance.tolist(),
"prediction": self._get_predictions([instance])[0],
"top_features": [
{"feature": f, "contribution": c}
for f, c in feature_importance[:num_features]
],
"explanation_type": "LIME"
}
def _generate_perturbations(self, instance: np.ndarray, n: int) -> np.ndarray:
"""Generate perturbed instances."""
noise = np.random.normal(0, 0.1, (n, len(instance)))
return instance + noise
def _get_predictions(self, instances: np.ndarray) -> np.ndarray:
"""Get model predictions."""
# Placeholder - would call actual model
return np.random.random(len(instances))
def _calculate_weights(self, instance: np.ndarray, perturbations: np.ndarray) -> np.ndarray:
"""Calculate weights based on distance."""
distances = np.sqrt(np.sum((perturbations - instance) ** 2, axis=1))
return np.exp(-distances)
def _fit_weighted_linear(
self,
X: np.ndarray,
y: np.ndarray,
weights: np.ndarray
) -> np.ndarray:
"""Fit weighted linear regression."""
# Simplified - would use sklearn in production
return np.random.random(X.shape[1])
def global_explanation_shap(self, dataset: np.ndarray) -> Dict:
"""Generate SHAP-style global explanation."""
# Simplified SHAP implementation
feature_importance = np.random.random(len(self.feature_names))
feature_importance = feature_importance / feature_importance.sum()
return {
"feature_importance": [
{"feature": f, "importance": i}
for f, i in zip(self.feature_names, feature_importance)
],
"explanation_type": "SHAP Global"
}
def counterfactual_explanation(
self,
instance: np.ndarray,
desired_outcome: int
) -> Dict:
"""Generate counterfactual explanation."""
# "What would need to change for a different outcome?"
current_pred = self._get_predictions([instance])[0]
# Find minimal changes (simplified)
changes = []
for i, (feature, value) in enumerate(zip(self.feature_names, instance)):
# Simplified logic - actual implementation would use optimization
new_value = value * 1.1 # 10% increase
changes.append({
"feature": feature,
"current_value": value,
"suggested_value": new_value,
"change_direction": "increase"
})
return {
"current_outcome": current_pred,
"desired_outcome": desired_outcome,
"suggested_changes": changes[:3], # Top 3 changes
"explanation_type": "Counterfactual"
}
User-Facing Transparency
class TransparencyInterface:
"""Generate user-facing transparency content."""
def __init__(self):
self.templates = {}
def generate_ai_disclosure(
self,
system_name: str,
purpose: str,
interaction_type: str
) -> str:
"""Generate AI disclosure notice."""
return f"""
**AI-Powered Assistance**
This {interaction_type} uses artificial intelligence ({system_name}) to {purpose}.
**What this means:**
- An AI system will process your input
- Responses are generated by AI, not a human
- The AI aims to be helpful but may make mistakes
**Your options:**
- You can request human assistance at any time
- You can ask for an explanation of any AI-generated response
- Your feedback helps us improve
*For questions about how your data is used, see our Privacy Policy.*
"""
def generate_decision_explanation(
self,
decision: str,
confidence: float,
factors: List[Dict],
alternatives: List[str]
) -> str:
"""Generate explanation for an AI decision."""
explanation = f"""
## Decision Summary
**Outcome:** {decision}
**Confidence Level:** {confidence * 100:.0f}%
### Key Factors
The following factors most influenced this decision:
"""
for i, factor in enumerate(factors[:5], 1):
direction = "+" if factor.get("contribution", 0) > 0 else "-"
explanation += f"{i}. **{factor['name']}**: {factor['description']} ({direction})\n"
explanation += """
### What This Means
"""
if confidence > 0.8:
explanation += "This decision has high confidence based on the available data.\n"
else:
explanation += "This decision has moderate confidence. Additional review may be helpful.\n"
explanation += """
### Your Rights
- **Contest this decision:** [Link to appeal process]
- **Request human review:** [Link to request form]
- **Access your data:** [Link to data request]
"""
return explanation
def generate_system_documentation(
self,
system_info: Dict
) -> str:
"""Generate public system documentation."""
return f"""
# About {system_info['name']}
## What It Does
{system_info['description']}
## How It Works
{system_info['methodology']}
## Data Used
{system_info['data_description']}
**We DO collect:**
{chr(10).join(f'- {item}' for item in system_info.get('data_collected', []))}
**We DO NOT collect:**
{chr(10).join(f'- {item}' for item in system_info.get('data_not_collected', []))}
## Limitations
{chr(10).join(f'- {item}' for item in system_info.get('limitations', []))}
## Questions?
Contact us at: {system_info.get('contact', 'ai-transparency@company.com')}
*Last updated: {system_info.get('last_updated', 'N/A')}*
"""
Audit Trail Implementation
from datetime import datetime
import json
import hashlib
class TransparencyAuditLog:
"""Maintain audit trail for AI system transparency."""
def __init__(self, system_id: str):
self.system_id = system_id
self.entries: List[Dict] = []
def log_decision(
self,
decision_id: str,
input_data: Dict,
output: Dict,
model_version: str,
explanation: Dict
):
"""Log a decision with full context."""
entry = {
"decision_id": decision_id,
"timestamp": datetime.now().isoformat(),
"system_id": self.system_id,
"model_version": model_version,
"input_hash": self._hash_data(input_data),
"output": output,
"explanation_available": explanation is not None,
"explanation_id": self._store_explanation(explanation) if explanation else None
}
self.entries.append(entry)
return entry
def _hash_data(self, data: Dict) -> str:
"""Create hash of input data for traceability."""
return hashlib.sha256(json.dumps(data, sort_keys=True).encode()).hexdigest()[:16]
def _store_explanation(self, explanation: Dict) -> str:
"""Store explanation and return reference ID."""
# In production, store in database
return f"exp_{datetime.now().strftime('%Y%m%d%H%M%S')}"
def retrieve_decision_context(self, decision_id: str) -> Optional[Dict]:
"""Retrieve full context for a decision."""
for entry in self.entries:
if entry["decision_id"] == decision_id:
return entry
return None
def generate_audit_report(self, start_date: datetime, end_date: datetime) -> Dict:
"""Generate audit report for time period."""
relevant = [
e for e in self.entries
if start_date.isoformat() <= e["timestamp"] <= end_date.isoformat()
]
return {
"system_id": self.system_id,
"period": {"start": start_date.isoformat(), "end": end_date.isoformat()},
"total_decisions": len(relevant),
"explanations_generated": sum(1 for e in relevant if e["explanation_available"]),
"model_versions_used": list(set(e["model_version"] for e in relevant))
}
Tomorrow, we’ll explore explainability requirements in depth!