4 min read
Fabric Domains: Organizing Your Data Mesh
Fabric Domains: Organizing Your Data Mesh
Fabric Domains bring data mesh principles to Microsoft’s analytics platform. They provide a way to organize workspaces by business area, enabling decentralized ownership with centralized governance.
Understanding Fabric Domains
Domains are logical groupings of workspaces that align with business capabilities:
from dataclasses import dataclass, field
from typing import List, Optional
from enum import Enum
class DomainType(Enum):
BUSINESS = "Business" # Sales, Marketing, Finance
TECHNICAL = "Technical" # Data Engineering, Platform
GEOGRAPHIC = "Geographic" # Regional divisions
PRODUCT = "Product" # Product lines
@dataclass
class FabricDomain:
name: str
description: str
domain_type: DomainType
owners: List[str]
workspaces: List[str] = field(default_factory=list)
sub_domains: List['FabricDomain'] = field(default_factory=list)
data_products: List[str] = field(default_factory=list)
def add_workspace(self, workspace_name: str):
self.workspaces.append(workspace_name)
def add_sub_domain(self, sub_domain: 'FabricDomain'):
self.sub_domains.append(sub_domain)
# Example domain structure
sales_domain = FabricDomain(
name="Sales",
description="All sales-related analytics and data products",
domain_type=DomainType.BUSINESS,
owners=["sales-data-owner@company.com"],
workspaces=[
"Sales-Analytics-Prod",
"Sales-Analytics-Dev",
"Sales-Reports"
],
data_products=[
"Customer 360",
"Sales Performance Dashboard",
"Revenue Forecasting Model"
]
)
# Add sub-domains
sales_domain.add_sub_domain(FabricDomain(
name="Sales Operations",
description="Sales operational analytics",
domain_type=DomainType.BUSINESS,
owners=["sales-ops@company.com"],
workspaces=["Sales-Ops-Analytics"]
))
Domain Architecture Patterns
Centralized vs Federated
# Centralized Model
centralized_structure = {
"platform_domain": {
"name": "Data Platform",
"owner": "central-data-team@company.com",
"responsibilities": [
"Data infrastructure",
"Shared semantic models",
"Data quality standards",
"Governance policies"
],
"workspaces": [
"Platform-Core",
"Shared-Data-Products",
"Data-Quality"
]
},
"consumer_domains": [
{"name": "Sales", "access": "read"},
{"name": "Marketing", "access": "read"},
{"name": "Finance", "access": "read"}
]
}
# Federated Model (Data Mesh)
federated_structure = {
"domains": [
{
"name": "Sales",
"owner": "sales-team@company.com",
"autonomy": "high",
"responsibilities": [
"Own sales data products",
"Define sales metrics",
"Manage sales pipelines"
],
"shared_products": ["Customer 360", "Revenue Metrics"]
},
{
"name": "Marketing",
"owner": "marketing-team@company.com",
"autonomy": "high",
"responsibilities": [
"Own marketing data products",
"Campaign analytics",
"Lead scoring"
],
"shared_products": ["Campaign Performance", "Lead Pipeline"]
},
{
"name": "Platform",
"owner": "platform-team@company.com",
"autonomy": "moderate",
"responsibilities": [
"Provide infrastructure",
"Enforce standards",
"Enable self-service"
]
}
],
"governance": {
"standards": "centralized",
"implementation": "federated"
}
}
Implementing Domain Boundaries
from typing import Dict, Set
class DomainBoundaryManager:
def __init__(self):
self.domains: Dict[str, FabricDomain] = {}
self.workspace_domain_map: Dict[str, str] = {}
self.data_contracts: Dict[str, dict] = {}
def register_domain(self, domain: FabricDomain):
"""Register a domain and its workspaces."""
self.domains[domain.name] = domain
for workspace in domain.workspaces:
self.workspace_domain_map[workspace] = domain.name
def get_domain_for_workspace(self, workspace: str) -> Optional[str]:
"""Get the domain that owns a workspace."""
return self.workspace_domain_map.get(workspace)
def define_data_contract(
self,
source_domain: str,
target_domain: str,
data_product: str,
sla: dict
):
"""Define a data sharing contract between domains."""
contract_id = f"{source_domain}->{target_domain}:{data_product}"
self.data_contracts[contract_id] = {
"source": source_domain,
"target": target_domain,
"data_product": data_product,
"sla": sla,
"status": "active"
}
def get_domain_dependencies(self, domain_name: str) -> Set[str]:
"""Get domains that this domain depends on."""
dependencies = set()
for contract in self.data_contracts.values():
if contract["target"] == domain_name:
dependencies.add(contract["source"])
return dependencies
# Usage
manager = DomainBoundaryManager()
manager.register_domain(sales_domain)
# Define data contract
manager.define_data_contract(
source_domain="Sales",
target_domain="Finance",
data_product="Revenue Metrics",
sla={
"freshness": "daily",
"quality_threshold": 0.99,
"availability": "99.9%"
}
)
Domain Governance
@dataclass
class DomainGovernancePolicy:
domain_name: str
naming_conventions: dict
required_metadata: List[str]
quality_rules: List[dict]
access_patterns: dict
# Define governance policies
sales_governance = DomainGovernancePolicy(
domain_name="Sales",
naming_conventions={
"workspaces": "{Domain}-{Environment}-{Purpose}",
"lakehouses": "lh_{domain}_{subject}",
"warehouses": "wh_{domain}_{subject}",
"reports": "rpt_{domain}_{name}"
},
required_metadata=[
"data_owner",
"data_steward",
"sensitivity_label",
"refresh_frequency",
"business_glossary_terms"
],
quality_rules=[
{"rule": "completeness", "threshold": 0.95},
{"rule": "uniqueness", "threshold": 1.0, "columns": ["customer_id"]},
{"rule": "freshness", "max_age_hours": 24}
],
access_patterns={
"default_access": "domain_members_read",
"cross_domain": "request_approval",
"external": "prohibited"
}
)
def validate_item_name(item_name: str, item_type: str, policy: DomainGovernancePolicy) -> bool:
"""Validate item naming against governance policy."""
pattern = policy.naming_conventions.get(item_type + "s")
if not pattern:
return True # No convention defined
# Simple validation (in production, use regex)
required_parts = pattern.replace("{", "").replace("}", "").split("_")
actual_parts = item_name.split("_")
return len(actual_parts) >= len(required_parts)
Best Practices for Domain Design
- Align with business capabilities, not org structure
- Keep domains cohesive - related data stays together
- Define clear ownership - one team per domain
- Establish data contracts for cross-domain sharing
- Standardize within domains, federate across domains
# Domain design checklist
domain_checklist = {
"alignment": [
"Maps to business capability",
"Clear bounded context",
"Minimal dependencies"
],
"ownership": [
"Single accountable owner",
"Dedicated data steward",
"Clear escalation path"
],
"data_products": [
"Well-defined interfaces",
"Documented schemas",
"SLA commitments"
],
"governance": [
"Naming conventions",
"Quality standards",
"Security policies"
]
}
Tomorrow, we’ll explore Endorsement in Fabric - how to certify and promote trusted data products!