January 8, 2023 2 min read

Responsible AI with Azure OpenAI: Building Ethical AI Systems

With great power comes great responsibility. Azure OpenAI Service provides powerful AI capabilities, but deploying them responsibly requires careful consideration of ethics, safety, and societal impact. Let’s explore how to build responsible AI systems.

Microsoft’s Responsible AI Principles

Microsoft’s Responsible AI framework guides Azure OpenAI Service:

Fairness: AI systems should treat all people fairly
Reliability & Safety: AI systems should perform reliably and safely
Privacy & Security: AI systems should be secure and respect privacy
Inclusiveness: AI systems should empower everyone
Transparency: AI systems should be understandable
Accountability: People should be accountable for AI systems

Implementing Responsible AI

Here’s a framework for implementing these principles:

from dataclasses import dataclass
from typing import List, Optional, Dict, Any
from enum import Enum
import logging

class RiskLevel(Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    CRITICAL = "critical"

@dataclass
class ResponsibleAIConfig:
    """Configuration for responsible AI deployment."""

    # Content filtering
    enable_content_filtering: bool = True
    content_filter_level: str = "medium"  # low, medium, high

    # Transparency
    disclose_ai_usage: bool = True
    log_all_interactions: bool = True

    # Human oversight
    require_human_review_threshold: RiskLevel = RiskLevel.HIGH
    escalation_contact: str = ""

    # Fairness
    bias_monitoring_enabled: bool = True
    demographic_parity_threshold: float = 0.8

    # Rate limiting for safety
    max_requests_per_user_per_hour: int = 100

class ResponsibleAIGuard:
    """Guard rails for responsible AI deployment."""

    def __init__(self, config: ResponsibleAIConfig):
        self.config = config
        self.logger = logging.getLogger("responsible_ai")

    def pre_request_check(
        self,
        prompt: str,
        user_id: str,
        context: Dict[str, Any]
    ) -> tuple[bool, Optional[str]]:
        """
        Check if request should proceed.
        Returns (should_proceed, reason_if_blocked)
        """

        # Check rate limits
        if not self._check_rate_limit(user_id):
            return False, "Rate limit exceeded"

        # Check for harmful content
        if self.config.enable_content_filtering:
            is_safe, category = self._check_content_safety(prompt)
            if not is_safe:
                self.logger.warning(
                    f"Blocked request from {user_id}: {category}"
                )
                return False, f"Content blocked: {category}"

        # Log interaction for transparency
        if self.config.log_all_interactions:
            self._log_interaction(user_id, prompt, "request")

        return True, None

    def post_response_check(
        self,
        response: str,
        user_id: str,
        context: Dict[str, Any]
    ) -> tuple[str, bool]:
        """
        Check response and potentially modify or flag.
        Returns (processed_response, needs_human_review)
        """

        needs_review = False
        processed = response

        # Check response safety
        if self.config.enable_content_filtering:
            is_safe, category = self._check_content_safety(response)
            if not is_safe:
                processed = self._get_safe_fallback_response()
                self.logger.warning(
                    f"Response filtered for {user_id}: {category}"
                )

        # Assess risk level
        risk_level = self._assess_risk_level(response, context)
        if risk_level.value >= self.config.require_human_review_threshold.value:
            needs_review = True
            self.logger.info(
                f"Flagged for human review: risk={risk_level.value}"
            )

        # Add AI disclosure if configured
        if self.config.disclose_ai_usage:
            processed = self._add_ai_disclosure(processed)

        # Log response
        if self.config.log_all_interactions:
            self._log_interaction(user_id, processed, "response")

        return processed, needs_review

    def _check_content_safety(self, text: str) -> tuple[bool, Optional[str]]:
        """Check text for harmful content categories."""
        # In production, use Azure Content Safety API
        harmful_patterns = {
            "hate_speech": ["hate", "discriminate"],
            "violence": ["kill", "attack", "weapon"],
            "self_harm": ["suicide", "self-harm"],
            "sexual": ["explicit content patterns"]
        }

        text_lower = text.lower()
        for category, patterns in harmful_patterns.items():
            for pattern in patterns:
                if pattern in text_lower:
                    return False, category

        return True, None

    def _assess_risk_level(
        self,
        response: str,
        context: Dict[str, Any]
    ) -> RiskLevel:
        """Assess risk level of response."""
        # Risk indicators
        high_risk_indicators = [
            "medical advice",
            "legal advice",
            "financial recommendation",
            "personal data"
        ]

        response_lower = response.lower()

        for indicator in high_risk_indicators:
            if indicator in response_lower:
                return RiskLevel.HIGH

        if len(response) > 2000:
            return RiskLevel.MEDIUM

        return RiskLevel.LOW

    def _check_rate_limit(self, user_id: str) -> bool:
        """Check if user is within rate limits."""
        # Implement with Redis or similar
        return True

    def _get_safe_fallback_response(self) -> str:
        """Return safe fallback when content is filtered."""
        return "I'm not able to provide that information. Please rephrase your question or contact support for assistance."

    def _add_ai_disclosure(self, response: str) -> str:
        """Add AI-generated content disclosure."""
        return f"{response}\n\n---\n*This response was generated by AI and may contain errors. Please verify important information.*"

    def _log_interaction(
        self,
        user_id: str,
        content: str,
        interaction_type: str
    ):
        """Log interaction for audit and analysis."""
        self.logger.info(
            f"Interaction logged",
            extra={
                "user_id": user_id,
                "type": interaction_type,
                "content_length": len(content),
                "timestamp": datetime.utcnow().isoformat()
            }
        )

Bias Detection and Mitigation

Monitor for bias in AI outputs:

from collections import Counter
from typing import List, Dict
import numpy as np

class BiasMonitor:
    """Monitor AI outputs for potential bias."""

    def __init__(self):
        self.demographic_groups = [
            "male", "female", "non-binary",
            "young", "old",
            "asian", "black", "white", "hispanic"
        ]
        self.response_history: Dict[str, List[str]] = {
            group: [] for group in self.demographic_groups
        }

    def analyze_response(
        self,
        response: str,
        context: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Analyze response for potential bias indicators."""

        analysis = {
            "sentiment_scores": self._analyze_sentiment(response),
            "length": len(response),
            "formality_score": self._analyze_formality(response),
            "detected_demographics": self._detect_demographic_references(response)
        }

        return analysis

    def calculate_demographic_parity(
        self,
        metric: str = "sentiment"
    ) -> Dict[str, float]:
        """
        Calculate demographic parity across groups.
        Returns parity scores (1.0 = perfect parity)
        """

        if metric == "sentiment":
            scores = {}
            for group, responses in self.response_history.items():
                if responses:
                    sentiments = [
                        self._analyze_sentiment(r)["positive"]
                        for r in responses
                    ]
                    scores[group] = np.mean(sentiments)

            if not scores:
                return {}

            max_score = max(scores.values())
            min_score = min(scores.values())

            # Parity ratio
            parity = min_score / max_score if max_score > 0 else 1.0

            return {
                "parity_ratio": parity,
                "group_scores": scores,
                "is_fair": parity >= 0.8  # 80% rule
            }

        return {}

    def _analyze_sentiment(self, text: str) -> Dict[str, float]:
        """Basic sentiment analysis."""
        # In production, use Azure Cognitive Services
        positive_words = ["good", "great", "excellent", "happy", "wonderful"]
        negative_words = ["bad", "poor", "terrible", "sad", "awful"]

        text_lower = text.lower()
        words = text_lower.split()

        positive_count = sum(1 for w in words if w in positive_words)
        negative_count = sum(1 for w in words if w in negative_words)
        total = positive_count + negative_count

        return {
            "positive": positive_count / total if total > 0 else 0.5,
            "negative": negative_count / total if total > 0 else 0.5
        }

    def _analyze_formality(self, text: str) -> float:
        """Analyze text formality (0=informal, 1=formal)."""
        formal_indicators = ["therefore", "however", "consequently", "regarding"]
        informal_indicators = ["gonna", "wanna", "hey", "cool", "awesome"]

        text_lower = text.lower()

        formal_count = sum(1 for i in formal_indicators if i in text_lower)
        informal_count = sum(1 for i in informal_indicators if i in text_lower)

        total = formal_count + informal_count
        return formal_count / total if total > 0 else 0.5

    def _detect_demographic_references(self, text: str) -> List[str]:
        """Detect demographic group references in text."""
        detected = []
        text_lower = text.lower()

        for group in self.demographic_groups:
            if group in text_lower:
                detected.append(group)

        return detected

Human-in-the-Loop Implementation

For high-stakes decisions, include human oversight:

from enum import Enum
import uuid
from datetime import datetime

class ReviewStatus(Enum):
    PENDING = "pending"
    APPROVED = "approved"
    REJECTED = "rejected"
    MODIFIED = "modified"

@dataclass
class HumanReviewRequest:
    id: str
    original_response: str
    context: Dict[str, Any]
    risk_level: RiskLevel
    created_at: datetime
    status: ReviewStatus = ReviewStatus.PENDING
    reviewer: Optional[str] = None
    reviewed_at: Optional[datetime] = None
    final_response: Optional[str] = None
    notes: Optional[str] = None

class HumanReviewQueue:
    """Queue for human review of AI responses."""

    def __init__(self):
        self.pending_reviews: Dict[str, HumanReviewRequest] = {}
        self.completed_reviews: List[HumanReviewRequest] = []

    def submit_for_review(
        self,
        response: str,
        context: Dict[str, Any],
        risk_level: RiskLevel
    ) -> str:
        """Submit response for human review."""
        review_id = str(uuid.uuid4())

        review = HumanReviewRequest(
            id=review_id,
            original_response=response,
            context=context,
            risk_level=risk_level,
            created_at=datetime.utcnow()
        )

        self.pending_reviews[review_id] = review

        # In production, send notification to reviewers
        self._notify_reviewers(review)

        return review_id

    def complete_review(
        self,
        review_id: str,
        status: ReviewStatus,
        reviewer: str,
        final_response: Optional[str] = None,
        notes: Optional[str] = None
    ):
        """Complete a human review."""
        if review_id not in self.pending_reviews:
            raise ValueError(f"Review {review_id} not found")

        review = self.pending_reviews.pop(review_id)
        review.status = status
        review.reviewer = reviewer
        review.reviewed_at = datetime.utcnow()
        review.final_response = final_response or review.original_response
        review.notes = notes

        self.completed_reviews.append(review)

        return review

    def get_pending_reviews(
        self,
        risk_level: Optional[RiskLevel] = None
    ) -> List[HumanReviewRequest]:
        """Get pending reviews, optionally filtered by risk level."""
        reviews = list(self.pending_reviews.values())

        if risk_level:
            reviews = [r for r in reviews if r.risk_level == risk_level]

        return sorted(reviews, key=lambda r: r.created_at)

    def _notify_reviewers(self, review: HumanReviewRequest):
        """Notify reviewers of new item."""
        # Implement email/Slack/Teams notification
        pass

# Usage with AI system
class ResponsibleAISystem:
    """Complete responsible AI system with human oversight."""

    def __init__(self, config: ResponsibleAIConfig):
        self.guard = ResponsibleAIGuard(config)
        self.review_queue = HumanReviewQueue()
        self.bias_monitor = BiasMonitor()

    async def process_request(
        self,
        prompt: str,
        user_id: str,
        context: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Process request with full responsible AI pipeline."""

        # Pre-request checks
        should_proceed, block_reason = self.guard.pre_request_check(
            prompt, user_id, context
        )

        if not should_proceed:
            return {
                "status": "blocked",
                "reason": block_reason
            }

        # Generate response (would call Azure OpenAI here)
        raw_response = await self._generate_response(prompt)

        # Post-response checks
        processed_response, needs_review = self.guard.post_response_check(
            raw_response, user_id, context
        )

        # Bias monitoring
        bias_analysis = self.bias_monitor.analyze_response(
            processed_response, context
        )

        # Human review if needed
        if needs_review:
            review_id = self.review_queue.submit_for_review(
                processed_response,
                context,
                RiskLevel.HIGH
            )

            return {
                "status": "pending_review",
                "review_id": review_id,
                "message": "Response is being reviewed by a human."
            }

        return {
            "status": "success",
            "response": processed_response,
            "bias_analysis": bias_analysis
        }

    async def _generate_response(self, prompt: str) -> str:
        """Generate response from Azure OpenAI."""
        # Implementation here
        pass

Transparency and Documentation

Create model cards and documentation:

@dataclass
class ModelCard:
    """Documentation for AI model deployment."""

    model_name: str
    version: str
    deployment_date: datetime

    # Model details
    description: str
    intended_use: List[str]
    out_of_scope_uses: List[str]

    # Performance
    evaluation_metrics: Dict[str, float]
    known_limitations: List[str]

    # Ethical considerations
    ethical_considerations: List[str]
    bias_evaluation: Dict[str, Any]

    # Maintenance
    maintainer: str
    update_frequency: str
    feedback_channel: str

# Example model card
customer_support_model = ModelCard(
    model_name="Customer Support Assistant",
    version="1.0.0",
    deployment_date=datetime(2023, 1, 8),
    description="AI assistant for answering customer support queries about our products.",
    intended_use=[
        "Answering product questions",
        "Providing troubleshooting guidance",
        "Directing users to relevant documentation"
    ],
    out_of_scope_uses=[
        "Medical advice",
        "Legal advice",
        "Financial recommendations",
        "Personal counseling"
    ],
    evaluation_metrics={
        "accuracy": 0.92,
        "user_satisfaction": 0.87,
        "escalation_rate": 0.15
    },
    known_limitations=[
        "May not have information about products released after training",
        "Cannot access user account information",
        "May occasionally provide outdated pricing"
    ],
    ethical_considerations=[
        "Responses are reviewed for bias monthly",
        "Sensitive topics are escalated to human agents",
        "User data is not used for training"
    ],
    bias_evaluation={
        "demographic_parity": 0.91,
        "equal_opportunity": 0.88
    },
    maintainer="AI Ethics Team",
    update_frequency="Monthly review, quarterly retraining",
    feedback_channel="ai-feedback@company.com"
)

Best Practices Summary

Always filter content: Use Azure’s content filtering by default
Log everything: Maintain audit trails for accountability
Monitor for bias: Regularly check outputs across demographics
Include human oversight: Especially for high-stakes decisions
Be transparent: Disclose AI usage, document limitations
Have escalation paths: Know when to involve humans
Regular audits: Review and update AI systems regularly