Back to Blog
5 min read

Platform Engineering Trends: Building Internal Data Platforms

Platform Engineering Trends: Building Internal Data Platforms

Platform engineering is transforming how organizations deliver data capabilities. Let’s explore this trend and its implications for data teams.

What is Platform Engineering?

from dataclasses import dataclass
from typing import List, Dict

@dataclass
class PlatformEngineeringDefinition:
    core_concept: str
    goals: List[str]
    key_principles: List[str]
    relationship_to_devops: str

platform_engineering_overview = PlatformEngineeringDefinition(
    core_concept="""
Platform engineering is the discipline of designing and building
self-service capabilities that enable development teams to
independently create and deliver applications.
""",
    goals=[
        "Reduce cognitive load on developers",
        "Standardize and automate common tasks",
        "Enable self-service with guardrails",
        "Improve developer experience and productivity"
    ],
    key_principles=[
        "Platform as a product (with customers)",
        "Self-service with guardrails",
        "Golden paths, not mandates",
        "Internal developer experience matters"
    ],
    relationship_to_devops="""
Platform engineering builds ON DevOps principles but addresses
the challenge of scaling DevOps practices across large organizations.
Instead of each team implementing DevOps, a platform team provides
capabilities that other teams consume.
"""
)

Data Platform Engineering

data_platform_engineering = {
    "definition": """
Data platform engineering applies platform engineering principles
to data infrastructure, enabling data teams to self-serve while
maintaining governance, quality, and efficiency.
""",
    "capabilities_to_provide": {
        "data_ingestion": {
            "description": "Standardized patterns for bringing data in",
            "examples": [
                "Pre-built connectors",
                "Self-service ingestion wizard",
                "Automated schema detection",
                "Built-in monitoring"
            ]
        },
        "data_transformation": {
            "description": "Tools and patterns for transforming data",
            "examples": [
                "Template notebooks",
                "dbt project templates",
                "Validated transformation patterns",
                "Testing frameworks"
            ]
        },
        "data_serving": {
            "description": "Ways to expose data to consumers",
            "examples": [
                "Semantic model templates",
                "API generation",
                "Self-service BI enablement",
                "Data product catalogs"
            ]
        },
        "governance": {
            "description": "Automated guardrails and compliance",
            "examples": [
                "Automated data classification",
                "Policy enforcement",
                "Lineage tracking",
                "Access control automation"
            ]
        }
    }
}

Building a Data Platform Product

@dataclass
class InternalDataProduct:
    name: str
    description: str
    users: List[str]
    capabilities: List[str]
    success_metrics: List[str]

class DataPlatformAsProduct:
    """Treat the internal data platform as a product."""

    def __init__(self, platform_name: str):
        self.platform_name = platform_name
        self.customers = []
        self.capabilities = []
        self.feedback = []

    def define_platform_vision(self) -> str:
        """Define the platform product vision."""
        return f"""
# {self.platform_name} Platform Vision

## Mission
Enable every team to independently create, manage, and share
data products with confidence and speed.

## Core Values
1. **Self-Service First** - Teams can do it themselves
2. **Guardrails, Not Gates** - Enable with protection
3. **Developer Experience Matters** - Frictionless interfaces
4. **Platform as Product** - Continuous improvement

## Success Metrics
- Time to first data product
- Self-service resolution rate
- Developer satisfaction (NPS)
- Platform adoption rate
"""

    def create_capability_roadmap(self) -> Dict:
        """Create platform capability roadmap."""
        return {
            "q1_2024": {
                "theme": "Foundation",
                "capabilities": [
                    "Self-service workspace provisioning",
                    "Template repository",
                    "Basic monitoring dashboard"
                ]
            },
            "q2_2024": {
                "theme": "Self-Service Data",
                "capabilities": [
                    "Data ingestion wizard",
                    "Schema registry",
                    "Data quality framework"
                ]
            },
            "q3_2024": {
                "theme": "Governance Automation",
                "capabilities": [
                    "Automated classification",
                    "Policy as code",
                    "Compliance reporting"
                ]
            },
            "q4_2024": {
                "theme": "Advanced Analytics",
                "capabilities": [
                    "ML platform integration",
                    "Feature store",
                    "Advanced observability"
                ]
            }
        }

    def define_golden_paths(self) -> List[Dict]:
        """Define golden paths for common scenarios."""
        return [
            {
                "name": "New Data Product",
                "description": "Standard path for creating a data product",
                "steps": [
                    "1. Request workspace (automated provisioning)",
                    "2. Use template repository to bootstrap",
                    "3. Configure ingestion using standard connectors",
                    "4. Apply transformation templates",
                    "5. Register in data catalog",
                    "6. Set up monitoring"
                ],
                "time_estimate": "Days, not weeks"
            },
            {
                "name": "Analytics Dashboard",
                "description": "Standard path for creating BI dashboards",
                "steps": [
                    "1. Connect to certified data products",
                    "2. Use semantic model templates",
                    "3. Apply standard visualizations",
                    "4. Automated testing and deployment"
                ],
                "time_estimate": "Hours, not days"
            }
        ]

Tools and Technologies

platform_engineering_tools = {
    "developer_portals": {
        "backstage": {
            "description": "Spotify's open source developer portal",
            "use_case": "Central catalog and documentation",
            "adoption": "Growing rapidly"
        },
        "port": {
            "description": "Commercial internal developer portal",
            "use_case": "Self-service platform interface",
            "adoption": "Enterprise focused"
        }
    },
    "infrastructure_as_code": {
        "terraform": "Multi-cloud infrastructure provisioning",
        "pulumi": "Programming language IaC",
        "bicep": "Azure-native IaC"
    },
    "gitops": {
        "argocd": "Kubernetes GitOps",
        "flux": "Kubernetes GitOps",
        "github_actions": "CI/CD automation"
    },
    "data_specific": {
        "dbt": "Data transformation orchestration",
        "great_expectations": "Data quality framework",
        "monte_carlo": "Data observability",
        "atlan": "Data catalog and governance"
    }
}

Measuring Platform Success

platform_metrics = {
    "adoption_metrics": [
        "% of teams using platform",
        "Number of data products created",
        "Self-service vs. assisted requests"
    ],
    "efficiency_metrics": [
        "Time to provision new workspace",
        "Time to first data product",
        "Pipeline development time"
    ],
    "quality_metrics": [
        "Data quality scores",
        "Incidents and MTTR",
        "Compliance rate"
    ],
    "satisfaction_metrics": [
        "Developer NPS",
        "Support ticket volume",
        "Feature request trends"
    ]
}

def calculate_platform_roi(metrics: Dict) -> Dict:
    """Calculate platform ROI."""
    time_saved_hours = metrics.get("hours_saved_per_project", 100)
    projects_per_year = metrics.get("projects_per_year", 50)
    hourly_rate = metrics.get("avg_hourly_rate", 100)

    annual_savings = time_saved_hours * projects_per_year * hourly_rate
    platform_cost = metrics.get("platform_annual_cost", 500000)

    return {
        "annual_savings": annual_savings,
        "platform_cost": platform_cost,
        "net_benefit": annual_savings - platform_cost,
        "roi_percent": ((annual_savings - platform_cost) / platform_cost) * 100
    }

Tomorrow, we’ll wrap up 2023 with a summary and key takeaways!

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.