Back to Blog
2 min read

Data Mesh Architecture: Enabling AI at Scale with Decentralized Data

Data Mesh principles enable organizations to scale AI initiatives by treating data as products owned by domain teams. This decentralized approach addresses the bottlenecks of centralized data platforms.

Defining Data Products for AI

Create self-describing data products:

from dataclasses import dataclass, field
from typing import List, Dict, Optional
from datetime import datetime
import json

@dataclass
class DataProductMetadata:
    name: str
    domain: str
    owner: str
    description: str
    schema_version: str
    quality_sla: Dict[str, float]
    tags: List[str] = field(default_factory=list)
    ai_ready: bool = False
    embedding_available: bool = False

@dataclass
class DataProduct:
    metadata: DataProductMetadata
    schema: Dict
    access_patterns: List[str]
    refresh_frequency: str
    lineage: List[str]

    def to_catalog_entry(self) -> dict:
        """Convert to data catalog format."""
        return {
            "name": self.metadata.name,
            "domain": self.metadata.domain,
            "owner": self.metadata.owner,
            "description": self.metadata.description,
            "schema": self.schema,
            "quality_metrics": self.metadata.quality_sla,
            "ai_features": {
                "ai_ready": self.metadata.ai_ready,
                "embedding_available": self.metadata.embedding_available
            },
            "access": self.access_patterns,
            "lineage": self.lineage,
            "last_updated": datetime.utcnow().isoformat()
        }

class DataProductRegistry:
    def __init__(self, storage_client):
        self.storage = storage_client
        self.products: Dict[str, DataProduct] = {}

    def register_product(self, product: DataProduct) -> str:
        """Register a new data product."""
        product_id = f"{product.metadata.domain}/{product.metadata.name}"
        self.products[product_id] = product

        # Store in catalog
        self.storage.save(f"catalog/{product_id}.json", product.to_catalog_entry())

        return product_id

    def discover_ai_ready_products(self, domain: Optional[str] = None) -> List[DataProduct]:
        """Find data products ready for AI consumption."""
        products = self.products.values()

        if domain:
            products = [p for p in products if p.metadata.domain == domain]

        return [p for p in products if p.metadata.ai_ready]

Building Domain-Specific AI Services

Create AI services that consume domain data products:

class DomainAIService:
    def __init__(self, domain: str, registry: DataProductRegistry, ai_client):
        self.domain = domain
        self.registry = registry
        self.ai_client = ai_client

    def build_domain_context(self) -> str:
        """Build context from domain data products."""
        products = self.registry.discover_ai_ready_products(self.domain)

        context_parts = []
        for product in products:
            context_parts.append(f"Data Source: {product.metadata.name}")
            context_parts.append(f"Description: {product.metadata.description}")
            context_parts.append(f"Schema: {json.dumps(product.schema, indent=2)}")
            context_parts.append("---")

        return "\n".join(context_parts)

    def answer_domain_question(self, question: str) -> dict:
        """Answer questions using domain data products."""

        context = self.build_domain_context()

        response = self.ai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": f"You are an expert in the {self.domain} domain. Use the following data product information to answer questions:\n\n{context}"},
                {"role": "user", "content": question}
            ]
        )

        return {
            "answer": response.choices[0].message.content,
            "domain": self.domain,
            "data_products_used": [p.metadata.name for p in self.registry.discover_ai_ready_products(self.domain)]
        }

Federated Governance

Implement federated governance that balances domain autonomy with enterprise standards for AI-ready data products.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.