Skip to content
Back to Blog
1 min read

Data Governance in Microsoft Fabric: Purview Integration and Compliance

I wrote “Data Governance in Microsoft Fabric: Purview Integration and Compliance” to share practical, production-minded guidance on this topic.

Governance Challenges

Organizations struggle with data sprawl, unclear ownership, inconsistent definitions, and regulatory requirements. A unified governance approach addresses these challenges systematically.

Purview Integration

Connect Fabric workspaces to Purview for automated discovery and classification:

from azure.purview.catalog import PurviewCatalogClient
from azure.purview.scanning import PurviewScanningClient
from azure.identity import DefaultAzureCredential
from dataclasses import dataclass

@dataclass
class DataAsset:
    qualified_name: str
    name: str
    asset_type: str
    classifications: list[str]
    owner: str
    glossary_terms: list[str]

class FabricGovernanceManager:
    def __init__(self, purview_account: str):
        credential = DefaultAzureCredential()
        self.catalog_client = PurviewCatalogClient(
            endpoint=f"https://{purview_account}.purview.azure.com",
            credential=credential
        )
        self.scanning_client = PurviewScanningClient(
            endpoint=f"https://{purview_account}.purview.azure.com",
            credential=credential
        )

    def register_fabric_source(
        self,
        workspace_name: str,
        lakehouse_name: str
    ) -> dict:
        """Register Fabric lakehouse as a data source."""

        source_definition = {
            "kind": "FabricLakehouse",
            "properties": {
                "endpoint": f"https://{workspace_name}.fabric.microsoft.com",
                "lakehouse": lakehouse_name,
                "collection": {
                    "referenceName": "Fabric-Data",
                    "type": "CollectionReference"
                }
            }
        }

        result = self.scanning_client.data_sources.create_or_update(
            data_source_name=f"fabric-{workspace_name}-{lakehouse_name}",
            body=source_definition
        )

        return result

    def setup_scan_schedule(
        self,
        data_source_name: str,
        scan_name: str,
        schedule_cron: str = "0 0 * * 0"  # Weekly
    ):
        """Configure automated scanning for data discovery."""

        scan_definition = {
            "kind": "FabricLakehouseMsi",
            "properties": {
                "scanRulesetName": "FabricLakehouse",
                "collection": {
                    "referenceName": "Fabric-Data",
                    "type": "CollectionReference"
                }
            }
        }

        # Create scan
        self.scanning_client.scans.create_or_update(
            data_source_name=data_source_name,
            scan_name=scan_name,
            body=scan_definition
        )

        # Set schedule
        trigger = {
            "properties": {
                "scanLevel": "Incremental",
                "schedule": {
                    "frequency": "Week",
                    "interval": 1,
                    "schedule": {
                        "hours": [0],
                        "minutes": [0],
                        "weekDays": ["Sunday"]
                    }
                }
            }
        }

        self.scanning_client.triggers.create_or_update(
            data_source_name=data_source_name,
            scan_name=scan_name,
            trigger_name="weekly-scan",
            body=trigger
        )

    def apply_classification(
        self,
        asset_qualified_name: str,
        classifications: list[str]
    ):
        """Apply data classifications to assets."""

        classification_body = {
            "classifications": [
                {"typeName": c} for c in classifications
            ]
        }

        self.catalog_client.entity.add_classifications(
            guid=self._get_asset_guid(asset_qualified_name),
            body=classification_body
        )

    def set_data_owner(
        self,
        asset_qualified_name: str,
        owner_email: str
    ):
        """Assign data ownership for accountability."""

        entity = self.catalog_client.entity.get_by_unique_attributes(
            type_name="DataSet",
            qualified_name=asset_qualified_name
        )

        entity["entity"]["attributes"]["owner"] = owner_email

        self.catalog_client.entity.create_or_update(
            entity=entity
        )

Business Glossary

Define consistent terminology across the organization:

class GlossaryManager:
    def __init__(self, catalog_client: PurviewCatalogClient):
        self.client = catalog_client

    def create_term(
        self,
        name: str,
        definition: str,
        category: str,
        synonyms: list[str] = None,
        related_terms: list[str] = None
    ) -> dict:
        """Create a business glossary term."""

        term = {
            "name": name,
            "qualifiedName": f"glossary@{name}",
            "longDescription": definition,
            "anchor": {
                "glossaryGuid": self._get_default_glossary_guid()
            },
            "resources": [
                {"displayName": "Category", "url": category}
            ]
        }

        if synonyms:
            term["synonyms"] = [
                {"termGuid": self._get_or_create_synonym(s)}
                for s in synonyms
            ]

        result = self.client.glossary.create_glossary_term(body=term)
        return result

    def link_term_to_asset(
        self,
        term_guid: str,
        asset_qualified_name: str
    ):
        """Associate glossary term with data asset."""

        asset_guid = self._get_asset_guid(asset_qualified_name)

        self.client.glossary.assign_term_to_entities(
            term_guid=term_guid,
            body=[{"guid": asset_guid}]
        )

Data governance with Fabric and Purview creates a foundation for trusted analytics. Automated discovery, consistent definitions, and clear ownership enable self-service while maintaining control.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.