December 15, 2023 1 min read

Fabric Migration Stories: Real-World Experiences

Microsoft Fabric Migration Case Study Data Platform Lessons Learned

Fabric Migration Stories: Real-World Experiences

Learning from others’ experiences accelerates your own journey. Let’s explore real-world Fabric migration patterns and lessons learned.

Migration Patterns

from dataclasses import dataclass, field
from typing import List, Dict, Optional
from enum import Enum

class SourcePlatform(Enum):
    AZURE_SYNAPSE = "Azure Synapse Analytics"
    DATABRICKS = "Databricks"
    SNOWFLAKE = "Snowflake"
    ON_PREM_SQL = "On-premises SQL Server"
    LEGACY_DW = "Legacy Data Warehouse"
    POWER_BI_PREMIUM = "Power BI Premium"
    AZURE_DATA_FACTORY = "Azure Data Factory"

class MigrationPattern(Enum):
    LIFT_AND_SHIFT = "Lift and Shift"
    MODERNIZE = "Modernize and Migrate"
    HYBRID = "Hybrid Coexistence"
    GREENFIELD = "Greenfield with Migration"

@dataclass
class MigrationScenario:
    source: SourcePlatform
    pattern: MigrationPattern
    complexity: str
    typical_duration: str
    key_considerations: List[str]
    migration_steps: List[str]
    common_pitfalls: List[str]

migration_scenarios = {
    SourcePlatform.AZURE_SYNAPSE: MigrationScenario(
        source=SourcePlatform.AZURE_SYNAPSE,
        pattern=MigrationPattern.MODERNIZE,
        complexity="Medium",
        typical_duration="3-6 months",
        key_considerations=[
            "T-SQL compatibility is high",
            "Dedicated SQL pools vs Fabric Warehouse",
            "Spark pools migrate to Fabric notebooks",
            "Pipelines need recreation or shortcuts"
        ],
        migration_steps=[
            "Inventory existing Synapse assets",
            "Create Fabric workspace structure",
            "Migrate data using shortcuts or copy",
            "Convert T-SQL scripts (minimal changes)",
            "Recreate Spark notebooks",
            "Migrate or recreate pipelines",
            "Validate and test",
            "Parallel run period",
            "Cutover"
        ],
        common_pitfalls=[
            "Underestimating pipeline complexity",
            "Missing Spark library dependencies",
            "Security model differences"
        ]
    ),
    SourcePlatform.DATABRICKS: MigrationScenario(
        source=SourcePlatform.DATABRICKS,
        pattern=MigrationPattern.HYBRID,
        complexity="Medium-High",
        typical_duration="4-8 months",
        key_considerations=[
            "Delta Lake compatibility (Fabric uses Delta)",
            "Notebook syntax differences",
            "Unity Catalog vs OneLake governance",
            "May keep Databricks for ML workloads"
        ],
        migration_steps=[
            "Assess Delta tables in existing lakehouse",
            "Set up OneLake shortcuts to existing storage",
            "Convert notebooks (PySpark largely compatible)",
            "Migrate or recreate jobs/workflows",
            "Establish new governance model",
            "Migrate incrementally by domain"
        ],
        common_pitfalls=[
            "Underestimating notebook conversion effort",
            "ML workflow disruption",
            "Cluster configuration differences"
        ]
    ),
    SourcePlatform.ON_PREM_SQL: MigrationScenario(
        source=SourcePlatform.ON_PREM_SQL,
        pattern=MigrationPattern.MODERNIZE,
        complexity="High",
        typical_duration="6-12 months",
        key_considerations=[
            "Network connectivity requirements",
            "Data transfer volumes",
            "T-SQL compatibility",
            "Reporting/BI migration",
            "Application dependencies"
        ],
        migration_steps=[
            "Database assessment and compatibility check",
            "Set up hybrid connectivity",
            "Initial data migration (bulk)",
            "Incremental sync setup",
            "Convert stored procedures",
            "Migrate reports to Power BI",
            "Application cutover planning",
            "Parallel run and validation",
            "Final cutover"
        ],
        common_pitfalls=[
            "Underestimating stored procedure complexity",
            "Network bandwidth constraints",
            "Application connection string changes"
        ]
    )
}

Migration Story: Financial Services Company

@dataclass
class MigrationCaseStudy:
    company_type: str
    source_systems: List[str]
    target: str
    duration_months: int
    team_size: int
    challenges: List[str]
    solutions: List[str]
    outcomes: Dict[str, str]
    lessons_learned: List[str]

financial_services_case = MigrationCaseStudy(
    company_type="Financial Services (Insurance)",
    source_systems=[
        "Azure Synapse Analytics",
        "Azure Data Factory",
        "Power BI Premium",
        "On-premises SQL Server (staging)"
    ],
    target="Microsoft Fabric",
    duration_months=8,
    team_size=12,
    challenges=[
        "500+ pipelines to migrate",
        "Complex T-SQL stored procedures",
        "Regulatory compliance requirements",
        "24/7 operational requirements",
        "Training 50+ users"
    ],
    solutions=[
        "Phased migration by business domain",
        "Automated pipeline conversion scripts",
        "Parallel environments during transition",
        "Comprehensive testing framework",
        "Tiered training program"
    ],
    outcomes={
        "cost_reduction": "35% lower TCO",
        "performance": "60% faster report refresh",
        "user_adoption": "80% self-service adoption",
        "time_to_insight": "50% reduction"
    },
    lessons_learned=[
        "Start governance planning early",
        "Invest in automated testing",
        "Don't underestimate change management",
        "Pilot with enthusiastic users first",
        "Keep old system running longer than planned"
    ]
)

def generate_case_study_report(case: MigrationCaseStudy) -> str:
    """Generate case study report."""
    return f"""
# Fabric Migration Case Study
## {case.company_type}

### Overview
- **Duration:** {case.duration_months} months
- **Team Size:** {case.team_size} people
- **Source Systems:** {', '.join(case.source_systems)}

### Challenges Faced

{chr(10).join(f'- {c}' for c in case.challenges)}

### Solutions Implemented

{chr(10).join(f'- {s}' for s in case.solutions)}

### Outcomes

| Metric | Result |
|--------|--------|
{chr(10).join(f'| {k.replace("_", " ").title()} | {v} |' for k, v in case.outcomes.items())}

### Key Lessons Learned

{chr(10).join(f'{i}. {l}' for i, l in enumerate(case.lessons_learned, 1))}

### Timeline

**Phase 1 (Months 1-2):** Discovery and planning
**Phase 2 (Months 3-4):** Pilot migration (Sales domain)
**Phase 3 (Months 5-6):** Expanded migration (Claims, Underwriting)
**Phase 4 (Months 7-8):** Final domains and optimization

### Recommendations for Others

1. Don't rush the discovery phase
2. Build strong partnership with Microsoft
3. Engage users early and often
4. Plan for longer coexistence than expected
5. Celebrate wins along the way
"""

Migration Tooling

class MigrationToolkit:
    """Tools and utilities for Fabric migration."""

    def __init__(self):
        self.inventory = {}
        self.mapping = {}

    def inventory_synapse_assets(self, workspace_name: str) -> Dict:
        """Inventory Synapse assets for migration planning."""
        # In production, would use Synapse REST APIs
        inventory = {
            "workspace": workspace_name,
            "sql_pools": [],
            "spark_pools": [],
            "pipelines": [],
            "notebooks": [],
            "datasets": [],
            "linked_services": []
        }

        return {
            "inventory": inventory,
            "migration_effort": self._estimate_effort(inventory)
        }

    def _estimate_effort(self, inventory: Dict) -> Dict:
        """Estimate migration effort."""
        effort_per_type = {
            "sql_pools": 5,  # days
            "spark_pools": 3,
            "pipelines": 0.5,  # per pipeline
            "notebooks": 0.25,  # per notebook
            "datasets": 0.1,
            "linked_services": 1
        }

        total_days = 0
        breakdown = {}

        for asset_type, items in inventory.items():
            if asset_type in effort_per_type and isinstance(items, list):
                days = len(items) * effort_per_type[asset_type]
                total_days += days
                breakdown[asset_type] = {"count": len(items), "days": days}

        return {
            "total_days": total_days,
            "breakdown": breakdown,
            "buffer_recommended": total_days * 0.3  # 30% buffer
        }

    def generate_migration_script(
        self,
        source_type: str,
        source_details: Dict,
        target_workspace: str
    ) -> str:
        """Generate migration script template."""
        if source_type == "synapse_table":
            return self._generate_synapse_table_migration(source_details, target_workspace)
        elif source_type == "pipeline":
            return self._generate_pipeline_migration(source_details, target_workspace)
        else:
            return "# Migration script template not available for this source type"

    def _generate_synapse_table_migration(self, details: Dict, target: str) -> str:
        """Generate script for Synapse table migration."""
        return f"""
# Synapse to Fabric Table Migration Script

## Option 1: Using Shortcuts (Zero Copy)
# Create shortcut in Fabric Lakehouse pointing to existing data

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

# Create shortcut (via Fabric UI or API)
# Path: abfss://{details.get('container')}@{details.get('storage')}.dfs.core.windows.net/{details.get('path')}

## Option 2: Copy Data

# Read from source
source_df = spark.read.format("delta").load(
    "abfss://{details.get('container')}@{details.get('storage')}.dfs.core.windows.net/{details.get('path')}"
)

# Write to Fabric Lakehouse
source_df.write.format("delta").mode("overwrite").saveAsTable("{details.get('table_name')}")

## Option 3: CTAS in Fabric Warehouse

-- Run in Fabric Warehouse
CREATE TABLE {target}.{details.get('schema')}.{details.get('table_name')}
AS SELECT * FROM [external_source].{details.get('schema')}.{details.get('table_name')}
"""

    def _generate_pipeline_migration(self, details: Dict, target: str) -> str:
        """Generate script for pipeline migration."""
        return f"""
# Pipeline Migration Notes

## Pipeline: {details.get('name')}

### Activities to Migrate
# Review each activity type and convert to Fabric equivalent

### Key Conversions
- Copy Activity: Similar in Fabric, update source/sink connections
- Data Flow: Convert to Dataflow Gen2
- Notebook Activity: Update notebook path
- Stored Procedure: Update connection

### Steps
1. Export pipeline JSON from ADF/Synapse
2. Review and update connections
3. Create new pipeline in Fabric
4. Test with sample data
5. Validate output
"""

    def validate_migration(self, source: Dict, target: Dict) -> Dict:
        """Validate migration by comparing source and target."""
        validations = {
            "row_count": source.get("row_count") == target.get("row_count"),
            "schema_match": source.get("schema") == target.get("schema"),
            "checksum_match": source.get("checksum") == target.get("checksum"),
            "sample_data_match": True  # Would compare actual samples
        }

        return {
            "all_passed": all(validations.values()),
            "validations": validations,
            "timestamp": datetime.now().isoformat()
        }

Tomorrow, we’ll explore Fabric best practices for production deployments!