Back to Blog
6 min read

GDPR and AI: Practical Compliance for ML Systems

GDPR and AI: Practical Compliance for ML Systems

GDPR presents unique challenges for AI systems, particularly around automated decision-making and the right to explanation. Let’s explore practical compliance approaches.

Key GDPR Articles for AI

from dataclasses import dataclass
from typing import List, Dict

@dataclass
class GDPRArticle:
    number: str
    title: str
    relevance_to_ai: str
    requirements: List[str]
    practical_implications: List[str]

ai_relevant_articles = {
    "article_13_14": GDPRArticle(
        number="Articles 13 & 14",
        title="Information to be provided",
        relevance_to_ai="Transparency about AI processing",
        requirements=[
            "Inform data subjects about automated decision-making",
            "Provide meaningful information about logic involved",
            "Explain significance and consequences"
        ],
        practical_implications=[
            "Clear disclosure that AI is being used",
            "Explanation of how AI affects the individual",
            "Description of data used in AI processing"
        ]
    ),
    "article_22": GDPRArticle(
        number="Article 22",
        title="Automated individual decision-making",
        relevance_to_ai="Right not to be subject to automated decisions",
        requirements=[
            "Right not to be subject to solely automated decisions with legal/significant effects",
            "Exceptions: contract, legal authorization, explicit consent",
            "Suitable safeguards required including human intervention"
        ],
        practical_implications=[
            "Human review process for significant AI decisions",
            "Ability to contest AI decisions",
            "Express point of view to human reviewer"
        ]
    ),
    "article_25": GDPRArticle(
        number="Article 25",
        title="Data protection by design and default",
        relevance_to_ai="Privacy-preserving AI design",
        requirements=[
            "Implement technical measures from start",
            "Process only necessary data",
            "Limit access to personal data"
        ],
        practical_implications=[
            "Privacy considerations in AI architecture",
            "Data minimization in training data",
            "Access controls for AI systems"
        ]
    ),
    "article_35": GDPRArticle(
        number="Article 35",
        title="Data Protection Impact Assessment",
        relevance_to_ai="DPIA required for high-risk AI",
        requirements=[
            "DPIA for systematic evaluation/profiling",
            "DPIA for large-scale processing",
            "Consult supervisory authority if high risk remains"
        ],
        practical_implications=[
            "Mandatory DPIA for most AI systems processing personal data",
            "Document risks and mitigations",
            "Regular review and updates"
        ]
    )
}

Implementing Right to Explanation

class ExplainableAICompliance:
    """Implement GDPR-compliant explanations for AI decisions."""

    def __init__(self):
        self.explanation_templates = {}
        self.decision_log = []

    def generate_explanation(
        self,
        decision_id: str,
        decision_type: str,
        input_data: Dict,
        model_output: Dict,
        confidence: float
    ) -> Dict:
        """Generate a GDPR-compliant explanation."""
        explanation = {
            "decision_id": decision_id,
            "timestamp": datetime.now(),
            "explanation": {
                "what_decision": self._explain_decision(decision_type, model_output),
                "how_made": self._explain_logic(decision_type),
                "data_used": self._explain_data_used(input_data),
                "consequences": self._explain_consequences(decision_type, model_output),
                "your_rights": self._explain_rights()
            },
            "confidence_level": f"{confidence * 100:.1f}%",
            "human_review_available": True
        }

        self.decision_log.append(explanation)
        return explanation

    def _explain_decision(self, decision_type: str, output: Dict) -> str:
        """Explain what decision was made."""
        templates = {
            "credit_scoring": f"Your credit application was {'approved' if output.get('approved') else 'not approved'}.",
            "content_moderation": f"Your content was {'approved' if output.get('approved') else 'flagged for review'}.",
            "customer_segmentation": f"You have been categorized as a '{output.get('segment')}' customer."
        }
        return templates.get(decision_type, "A decision was made regarding your request.")

    def _explain_logic(self, decision_type: str) -> str:
        """Explain the logic/methodology used."""
        explanations = {
            "credit_scoring": """
Our AI system analyzed multiple factors including:
- Payment history patterns
- Account utilization
- Length of credit history
- Recent credit inquiries

The system uses a machine learning model trained on historical data
to predict creditworthiness. The model considers how these factors
relate to successful loan repayment in similar cases.
""",
            "content_moderation": """
Our AI system analyzed your content for:
- Policy compliance
- Community guidelines adherence
- Potential harmful content

The system uses natural language processing to understand context
and compare against our content policies.
""",
            "customer_segmentation": """
Our AI system analyzed your interactions including:
- Purchase history
- Engagement patterns
- Preferences indicated

This helps us provide more relevant recommendations and services.
"""
        }
        return explanations.get(decision_type, "Our system used automated analysis to make this decision.")

    def _explain_data_used(self, input_data: Dict) -> Dict:
        """Explain what data was used."""
        # Sanitize and categorize the data
        data_categories = {}

        for key in input_data.keys():
            if "name" in key.lower() or "email" in key.lower():
                data_categories["Identity Information"] = True
            elif "payment" in key.lower() or "credit" in key.lower():
                data_categories["Financial Information"] = True
            elif "purchase" in key.lower() or "order" in key.lower():
                data_categories["Transaction History"] = True
            else:
                data_categories["Other Information"] = True

        return {
            "categories_used": list(data_categories.keys()),
            "note": "You can request full details of your personal data under your right to access."
        }

    def _explain_consequences(self, decision_type: str, output: Dict) -> str:
        """Explain consequences of the decision."""
        consequences = {
            "credit_scoring": "This decision affects whether you can receive the requested credit product and at what terms.",
            "content_moderation": "This decision affects the visibility of your content on our platform.",
            "customer_segmentation": "This categorization influences the recommendations and offers you receive."
        }
        return consequences.get(decision_type, "This decision may affect our services to you.")

    def _explain_rights(self) -> Dict:
        """Explain data subject rights."""
        return {
            "right_to_human_review": "You can request a human to review this decision.",
            "right_to_contest": "You can contest this decision and provide additional information.",
            "right_to_access": "You can request access to all personal data we hold about you.",
            "right_to_rectification": "You can request correction of inaccurate data.",
            "contact": "privacy@company.com"
        }

    def request_human_review(self, decision_id: str, reason: str) -> Dict:
        """Process a request for human review."""
        return {
            "request_id": f"HR-{decision_id}",
            "status": "Submitted",
            "expected_response": "5 business days",
            "message": "Your request for human review has been submitted. A human reviewer will examine your case and contact you."
        }

Data Minimization for AI

class DataMinimizationFramework:
    """Implement data minimization for AI systems."""

    def __init__(self):
        self.data_inventory = {}
        self.purpose_mapping = {}

    def register_data_field(
        self,
        field_name: str,
        data_category: str,
        purposes: List[str],
        retention_days: int,
        necessary_for_ai: bool
    ):
        """Register a data field with its purposes."""
        self.data_inventory[field_name] = {
            "category": data_category,
            "purposes": purposes,
            "retention_days": retention_days,
            "necessary_for_ai": necessary_for_ai
        }

    def assess_minimization(self, current_fields: List[str], ai_purpose: str) -> Dict:
        """Assess if data collection is minimized."""
        assessment = {
            "purpose": ai_purpose,
            "fields_assessed": len(current_fields),
            "necessary_fields": [],
            "unnecessary_fields": [],
            "recommendations": []
        }

        for field in current_fields:
            if field in self.data_inventory:
                info = self.data_inventory[field]
                if info["necessary_for_ai"] and ai_purpose in info["purposes"]:
                    assessment["necessary_fields"].append(field)
                else:
                    assessment["unnecessary_fields"].append(field)
                    assessment["recommendations"].append(
                        f"Consider removing '{field}' - not necessary for {ai_purpose}"
                    )
            else:
                assessment["recommendations"].append(
                    f"Field '{field}' not in data inventory - review necessity"
                )

        assessment["minimization_score"] = (
            len(assessment["necessary_fields"]) /
            len(current_fields) if current_fields else 0
        )

        return assessment

    def anonymization_options(self, field_name: str) -> List[Dict]:
        """Suggest anonymization techniques for a field."""
        techniques = {
            "name": [
                {"technique": "Pseudonymization", "description": "Replace with unique identifier"},
                {"technique": "Generalization", "description": "Use initials only"}
            ],
            "email": [
                {"technique": "Hashing", "description": "One-way hash for matching"},
                {"technique": "Domain only", "description": "Keep only domain part"}
            ],
            "age": [
                {"technique": "Bucketing", "description": "Use age ranges (18-25, 26-35, etc.)"},
                {"technique": "Perturbation", "description": "Add small random noise"}
            ],
            "location": [
                {"technique": "Generalization", "description": "Use region instead of exact address"},
                {"technique": "K-anonymity", "description": "Ensure k individuals share same location"}
            ]
        }

        # Find matching techniques
        for key in techniques:
            if key in field_name.lower():
                return techniques[key]

        return [{"technique": "Review required", "description": "Manual assessment needed"}]

Tomorrow, we’ll explore the EU AI Act and its implications!

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.