Skip to content
Back to Blog
1 min read

Data Mesh Architecture: Enabling AI at Scale with Decentralized Data

I wrote “Data Mesh Architecture: Enabling AI at Scale with Decentralized Data” to share practical, production-minded guidance on this topic.

Defining Data Products for AI

Create self-describing data products:

from dataclasses import dataclass, field
from typing import List, Dict, Optional
from datetime import datetime
import json

@dataclass
class DataProductMetadata:
    name: str
    domain: str
    owner: str
    description: str
    schema_version: str
    quality_sla: Dict[str, float]
    tags: List[str] = field(default_factory=list)
    ai_ready: bool = False
    embedding_available: bool = False

@dataclass
class DataProduct:
    metadata: DataProductMetadata
    schema: Dict
    access_patterns: List[str]
    refresh_frequency: str
    lineage: List[str]

    def to_catalog_entry(self) -> dict:
        """Convert to data catalog format."""
        return {
            "name": self.metadata.name,
            "domain": self.metadata.domain,
            "owner": self.metadata.owner,
            "description": self.metadata.description,
            "schema": self.schema,
            "quality_metrics": self.metadata.quality_sla,
            "ai_features": {
                "ai_ready": self.metadata.ai_ready,
                "embedding_available": self.metadata.embedding_available
            },
            "access": self.access_patterns,
            "lineage": self.lineage,
            "last_updated": datetime.utcnow().isoformat()
        }

class DataProductRegistry:
    def __init__(self, storage_client):
        self.storage = storage_client
        self.products: Dict[str, DataProduct] = {}

    def register_product(self, product: DataProduct) -> str:
        """Register a new data product."""
        product_id = f"{product.metadata.domain}/{product.metadata.name}"
        self.products[product_id] = product

        # Store in catalog
        self.storage.save(f"catalog/{product_id}.json", product.to_catalog_entry())

        return product_id

    def discover_ai_ready_products(self, domain: Optional[str] = None) -> List[DataProduct]:
        """Find data products ready for AI consumption."""
        products = self.products.values()

        if domain:
            products = [p for p in products if p.metadata.domain == domain]

        return [p for p in products if p.metadata.ai_ready]

Building Domain-Specific AI Services

Create AI services that consume domain data products:

class DomainAIService:
    def __init__(self, domain: str, registry: DataProductRegistry, ai_client):
        self.domain = domain
        self.registry = registry
        self.ai_client = ai_client

    def build_domain_context(self) -> str:
        """Build context from domain data products."""
        products = self.registry.discover_ai_ready_products(self.domain)

        context_parts = []
        for product in products:
            context_parts.append(f"Data Source: {product.metadata.name}")
            context_parts.append(f"Description: {product.metadata.description}")
            context_parts.append(f"Schema: {json.dumps(product.schema, indent=2)}")
            context_parts.append("---")

        return "\n".join(context_parts)

    def answer_domain_question(self, question: str) -> dict:
        """Answer questions using domain data products."""

        context = self.build_domain_context()

        response = self.ai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": f"You are an expert in the {self.domain} domain. Use the following data product information to answer questions:\n\n{context}"},
                {"role": "user", "content": question}
            ]
        )

        return {
            "answer": response.choices[0].message.content,
            "domain": self.domain,
            "data_products_used": [p.metadata.name for p in self.registry.discover_ai_ready_products(self.domain)]
        }

Federated Governance

Implement federated governance that balances domain autonomy with enterprise standards for AI-ready data products.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.