Skip to content
Back to Blog
1 min read

GDPR and AI: Practical Compliance for ML Systems

I wrote “GDPR and AI: Practical Compliance for ML Systems” to share practical, production-minded guidance on this topic.

GDPR’s implications for AI are practical, not theoretical: logging decisions, maintaining provenance, and ensuring human review where necessary are the mechanisms that keep systems both useful and compliant. This post focuses on pragmatic patterns teams can implement today.

Key GDPR Articles for AI

Key GDPR Articles for AI

from dataclasses import dataclass
from typing import List, Dict

@dataclass
class GDPRArticle:
    number: str
    title: str
    relevance_to_ai: str
    requirements: List[str]
    practical_implications: List[str]

ai_relevant_articles = {
    "article_13_14": GDPRArticle(
        number="Articles 13 & 14",
        title="Information to be provided",
        relevance_to_ai="Transparency about AI processing",
        requirements=[
            "Inform data subjects about automated decision-making",
            "Provide meaningful information about logic involved",
            "Explain significance and consequences"
        ],
        practical_implications=[
            "Clear disclosure that AI is being used",
            "Explanation of how AI affects the individual",
            "Description of data used in AI processing"
        ]
    ),
    "article_22": GDPRArticle(
        number="Article 22",
        title="Automated individual decision-making",
        relevance_to_ai="Right not to be subject to automated decisions",
        requirements=[
            "Right not to be subject to solely automated decisions with legal/significant effects",
            "Exceptions: contract, legal authorization, explicit consent",
            "Suitable safeguards required including human intervention"
        ],
        practical_implications=[
            "Human review process for significant AI decisions",
            "Ability to contest AI decisions",
            "Express point of view to human reviewer"
        ]
    ),
    "article_25": GDPRArticle(
        number="Article 25",
        title="Data protection by design and default",
        relevance_to_ai="Privacy-preserving AI design",
        requirements=[
            "Implement technical measures from start",
            "Process only necessary data",
            "Limit access to personal data"
        ],
        practical_implications=[
            "Privacy considerations in AI architecture",
            "Data minimization in training data",
            "Access controls for AI systems"
        ]
    ),
    "article_35": GDPRArticle(
        number="Article 35",
        title="Data Protection Impact Assessment",
        relevance_to_ai="DPIA required for high-risk AI",
        requirements=[
            "DPIA for systematic evaluation/profiling",
            "DPIA for large-scale processing",
            "Consult supervisory authority if high risk remains"
        ],
        practical_implications=[
            "Mandatory DPIA for most AI systems processing personal data",
            "Document risks and mitigations",
            "Regular review and updates"
        ]
    )
}

Implementing Right to Explanation

class ExplainableAICompliance:
    """Implement GDPR-compliant explanations for AI decisions."""

    def __init__(self):
        self.explanation_templates = {}
        self.decision_log = []

    def generate_explanation(
        self,
        decision_id: str,
        decision_type: str,
        input_data: Dict,
        model_output: Dict,
        confidence: float
    ) -> Dict:
        """Generate a GDPR-compliant explanation."""
        explanation = {
            "decision_id": decision_id,
            "timestamp": datetime.now(),
            "explanation": {
                "what_decision": self._explain_decision(decision_type, model_output),
                "how_made": self._explain_logic(decision_type),
                "data_used": self._explain_data_used(input_data),
                "consequences": self._explain_consequences(decision_type, model_output),
                "your_rights": self._explain_rights()
            },
            "confidence_level": f"{confidence * 100:.1f}%",
            "human_review_available": True
        }

        self.decision_log.append(explanation)
        return explanation

    def _explain_decision(self, decision_type: str, output: Dict) -> str:
        """Explain what decision was made."""
        templates = {
            "credit_scoring": f"Your credit application was {'approved' if output.get('approved') else 'not approved'}.",
            "content_moderation": f"Your content was {'approved' if output.get('approved') else 'flagged for review'}.",
            "customer_segmentation": f"You have been categorized as a '{output.get('segment')}' customer."
        }
        return templates.get(decision_type, "A decision was made regarding your request.")

    def _explain_logic(self, decision_type: str) -> str:
        """Explain the logic/methodology used."""
        explanations = {
            "credit_scoring": """
Our AI system analyzed multiple factors including:
- Payment history patterns
- Account utilization
- Length of credit history
- Recent credit inquiries

The system uses a machine learning model trained on historical data
to predict creditworthiness. The model considers how these factors
relate to successful loan repayment in similar cases.
""",
            "content_moderation": """
Our AI system analyzed your content for:
- Policy compliance
- Community guidelines adherence
- Potential harmful content

The system uses natural language processing to understand context
and compare against our content policies.
""",
            "customer_segmentation": """
Our AI system analyzed your interactions including:
- Purchase history
- Engagement patterns
- Preferences indicated

This helps us provide more relevant recommendations and services.
"""
        }
        return explanations.get(decision_type, "Our system used automated analysis to make this decision.")

    def _explain_data_used(self, input_data: Dict) -> Dict:
        """Explain what data was used."""
        # Sanitize and categorize the data
        data_categories = {}

        for key in input_data.keys():
            if "name" in key.lower() or "email" in key.lower():
                data_categories["Identity Information"] = True
            elif "payment" in key.lower() or "credit" in key.lower():
                data_categories["Financial Information"] = True
            elif "purchase" in key.lower() or "order" in key.lower():
                data_categories["Transaction History"] = True
            else:
                data_categories["Other Information"] = True

        return {
            "categories_used": list(data_categories.keys()),
            "note": "You can request full details of your personal data under your right to access."
        }

    def _explain_consequences(self, decision_type: str, output: Dict) -> str:
        """Explain consequences of the decision."""
        consequences = {
            "credit_scoring": "This decision affects whether you can receive the requested credit product and at what terms.",
            "content_moderation": "This decision affects the visibility of your content on our platform.",
            "customer_segmentation": "This categorization influences the recommendations and offers you receive."
        }
        return consequences.get(decision_type, "This decision may affect our services to you.")

    def _explain_rights(self) -> Dict:
        """Explain data subject rights."""
        return {
            "right_to_human_review": "You can request a human to review this decision.",
            "right_to_contest": "You can contest this decision and provide additional information.",
            "right_to_access": "You can request access to all personal data we hold about you.",
            "right_to_rectification": "You can request correction of inaccurate data.",
            "contact": "privacy@company.com"
        }

    def request_human_review(self, decision_id: str, reason: str) -> Dict:
        """Process a request for human review."""
        return {
            "request_id": f"HR-{decision_id}",
            "status": "Submitted",
            "expected_response": "5 business days",
            "message": "Your request for human review has been submitted. A human reviewer will examine your case and contact you."
        }

Data Minimization for AI

class DataMinimizationFramework:
    """Implement data minimization for AI systems."""

    def __init__(self):
        self.data_inventory = {}
        self.purpose_mapping = {}

    def register_data_field(
        self,
        field_name: str,
        data_category: str,
        purposes: List[str],
        retention_days: int,
        necessary_for_ai: bool
    ):
        """Register a data field with its purposes."""
        self.data_inventory[field_name] = {
            "category": data_category,
            "purposes": purposes,
            "retention_days": retention_days,
            "necessary_for_ai": necessary_for_ai
        }

    def assess_minimization(self, current_fields: List[str], ai_purpose: str) -> Dict:
        """Assess if data collection is minimized."""
        assessment = {
            "purpose": ai_purpose,
            "fields_assessed": len(current_fields),
            "necessary_fields": [],
            "unnecessary_fields": [],
            "recommendations": []
        }

        for field in current_fields:
            if field in self.data_inventory:
                info = self.data_inventory[field]
                if info["necessary_for_ai"] and ai_purpose in info["purposes"]:
                    assessment["necessary_fields"].append(field)
                else:
                    assessment["unnecessary_fields"].append(field)
                    assessment["recommendations"].append(
                        f"Consider removing '{field}' - not necessary for {ai_purpose}"
                    )
            else:
                assessment["recommendations"].append(
                    f"Field '{field}' not in data inventory - review necessity"
                )

        assessment["minimization_score"] = (
            len(assessment["necessary_fields"]) /
            len(current_fields) if current_fields else 0
        )

        return assessment

    def anonymization_options(self, field_name: str) -> List[Dict]:
        """Suggest anonymization techniques for a field."""
        techniques = {
            "name": [
                {"technique": "Pseudonymization", "description": "Replace with unique identifier"},
                {"technique": "Generalization", "description": "Use initials only"}
            ],
            "email": [
                {"technique": "Hashing", "description": "One-way hash for matching"},
                {"technique": "Domain only", "description": "Keep only domain part"}
            ],
            "age": [
                {"technique": "Bucketing", "description": "Use age ranges (18-25, 26-35, etc.)"},
                {"technique": "Perturbation", "description": "Add small random noise"}
            ],
            "location": [
                {"technique": "Generalization", "description": "Use region instead of exact address"},
                {"technique": "K-anonymity", "description": "Ensure k individuals share same location"}
            ]
        }

        # Find matching techniques
        for key in techniques:
            if key in field_name.lower():
                return techniques[key]

        return [{"technique": "Review required", "description": "Manual assessment needed"}]

Tomorrow, we’ll explore the EU AI Act and its implications!\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.