November 10, 2021 1 min read

Power BI Premium Capacities: Scaling Enterprise Analytics

Power BI Analytics Microsoft Data Enterprise

Power BI Premium capacities provide dedicated resources for enterprise analytics. At Ignite 2021, Microsoft announced significant updates to Premium, including Premium Per User and enhanced capacity management features.

Understanding Premium Capacities

Premium capacities offer:

Dedicated compute: No sharing with other tenants
Larger datasets: Up to 400GB per dataset
Paginated reports: Enterprise reporting
Dataflows: Self-service data prep
AI features: AutoML, cognitive services
Deployment pipelines: Dev/test/prod workflows

Premium SKU Options

SKU        V-Cores    Memory    Max Dataset Size    Price/Month (approx)
-----------------------------------------------------------------------
EM1/A1     1          3 GB      3 GB               $735
EM2/A2     2          5 GB      6 GB               $1,470
EM3/A3     4          10 GB     12 GB              $2,940
P1/A4      8          25 GB     25 GB              $4,995
P2/A5      16         50 GB     50 GB              $9,990
P3/A6      32         100 GB    100 GB             $19,980
P4/A7      64         200 GB    200 GB             Contact
P5/A8      128        400 GB    400 GB             Contact

Premium Per User (PPU):

- Cost: ~$20/user/month
- All Premium features
- 100 GB dataset limit
- Great for smaller teams

Capacity Management with REST API

Monitor and manage capacities programmatically:

import requests
import msal

# Authentication
app = msal.ConfidentialClientApplication(
    client_id="your-client-id",
    client_credential="your-client-secret",
    authority="https://login.microsoftonline.com/your-tenant-id"
)

token = app.acquire_token_for_client(
    scopes=["https://analysis.windows.net/powerbi/api/.default"]
)

headers = {
    "Authorization": f"Bearer {token['access_token']}",
    "Content-Type": "application/json"
}

# Get capacity details
def get_capacities():
    response = requests.get(
        "https://api.powerbi.com/v1.0/myorg/capacities",
        headers=headers
    )
    return response.json()["value"]

# Get capacity workloads
def get_capacity_workloads(capacity_id):
    response = requests.get(
        f"https://api.powerbi.com/v1.0/myorg/capacities/{capacity_id}/Workloads",
        headers=headers
    )
    return response.json()["value"]

# Refresh capacity
def refresh_capacity(capacity_id):
    response = requests.post(
        f"https://api.powerbi.com/v1.0/myorg/capacities/{capacity_id}/refresh",
        headers=headers
    )
    return response.status_code == 200

capacities = get_capacities()
for cap in capacities:
    print(f"Capacity: {cap['displayName']}")
    print(f"  SKU: {cap['sku']}")
    print(f"  State: {cap['state']}")
    print(f"  Region: {cap['region']}")

Configuring Workloads

Optimize capacity for your workload types:

def update_workload_settings(capacity_id, workload_name, settings):
    """
    Update workload settings for a capacity.

    workload_name: 'Dataflows', 'PaginatedReports', 'AI'
    settings: {'maxMemoryPercentageSetByUser': 50, 'state': 'Enabled'}
    """
    response = requests.patch(
        f"https://api.powerbi.com/v1.0/myorg/capacities/{capacity_id}/Workloads/{workload_name}",
        headers=headers,
        json=settings
    )
    return response.status_code == 200

# Enable AI workload with 20% memory
update_workload_settings(
    capacity_id="your-capacity-id",
    workload_name="AI",
    settings={
        "state": "Enabled",
        "maxMemoryPercentageSetByUser": 20
    }
)

# Configure paginated reports
update_workload_settings(
    capacity_id="your-capacity-id",
    workload_name="PaginatedReports",
    settings={
        "state": "Enabled",
        "maxMemoryPercentageSetByUser": 30
    }
)

# Configure dataflows
update_workload_settings(
    capacity_id="your-capacity-id",
    workload_name="Dataflows",
    settings={
        "state": "Enabled",
        "maxMemoryPercentageSetByUser": 40
    }
)

Monitoring Capacity Health

import pandas as pd
from datetime import datetime, timedelta

def get_capacity_metrics(capacity_id, start_time, end_time):
    """Get capacity utilization metrics."""
    response = requests.get(
        f"https://api.powerbi.com/v1.0/myorg/capacities/{capacity_id}/Refreshables",
        headers=headers,
        params={
            "$top": 100
        }
    )
    return response.json()["value"]

def analyze_refresh_performance(capacity_id):
    """Analyze dataset refresh performance."""
    refreshables = get_capacity_metrics(capacity_id, None, None)

    df = pd.DataFrame(refreshables)
    df['averageDurationMs'] = df['averageDuration'].apply(
        lambda x: x.get('durationInMs', 0) if x else 0
    )

    # Identify slow refreshes
    slow_refreshes = df[df['averageDurationMs'] > 300000]  # > 5 minutes

    return {
        'total_datasets': len(df),
        'slow_datasets': len(slow_refreshes),
        'average_duration_ms': df['averageDurationMs'].mean(),
        'slow_dataset_names': slow_refreshes['name'].tolist()
    }

# Usage
performance = analyze_refresh_performance("your-capacity-id")
print(f"Total datasets: {performance['total_datasets']}")
print(f"Slow datasets: {performance['slow_datasets']}")
print(f"Average duration: {performance['average_duration_ms']/1000:.2f} seconds")

Auto-Scale Configuration

Configure auto-scale for Premium capacities (Azure-based):

param location string = resourceGroup().location
param capacityName string
param adminEmail string

resource powerBICapacity 'Microsoft.PowerBIDedicated/capacities@2021-01-01' = {
  name: capacityName
  location: location
  sku: {
    name: 'A4'
    tier: 'PBIE_Azure'
  }
  properties: {
    administration: {
      members: [adminEmail]
    }
    mode: 'Gen2'
  }
}

// Auto-scale settings using Azure Automation
resource automationAccount 'Microsoft.Automation/automationAccounts@2021-06-22' = {
  name: '${capacityName}-automation'
  location: location
  properties: {
    sku: {
      name: 'Basic'
    }
  }
}

resource scaleUpRunbook 'Microsoft.Automation/automationAccounts/runbooks@2019-06-01' = {
  parent: automationAccount
  name: 'ScaleUpCapacity'
  location: location
  properties: {
    runbookType: 'PowerShell'
    publishContentLink: {
      uri: 'https://raw.githubusercontent.com/.../scale-up.ps1'
    }
  }
}

PowerShell runbook for scaling:

# scale-capacity.ps1
param(
    [string]$ResourceGroupName,
    [string]$CapacityName,
    [string]$TargetSku
)

# Authenticate with managed identity
Connect-AzAccount -Identity

# Get current capacity
$capacity = Get-AzPowerBIEmbeddedCapacity `
    -ResourceGroupName $ResourceGroupName `
    -Name $CapacityName

Write-Output "Current SKU: $($capacity.Sku)"

# Scale the capacity
if ($capacity.Sku -ne $TargetSku) {
    Update-AzPowerBIEmbeddedCapacity `
        -ResourceGroupName $ResourceGroupName `
        -Name $CapacityName `
        -Sku $TargetSku

    Write-Output "Scaled to: $TargetSku"
} else {
    Write-Output "Already at target SKU"
}

Dataset Size Optimization

Large model best practices:

// Use calculated columns sparingly - prefer measures
// Bad: Calculated column
Total Amount = Sales[Quantity] * Sales[Unit Price]

// Good: Measure (calculated at query time)
Total Amount := SUMX(Sales, Sales[Quantity] * Sales[Unit Price])

// Use aggregations for large tables
// Create aggregation table in Power Query
let
    Source = Sales,
    Grouped = Table.Group(
        Source,
        {"Date", "Product", "Region"},
        {
            {"TotalQuantity", each List.Sum([Quantity]), type number},
            {"TotalAmount", each List.Sum([Amount]), type number},
            {"RowCount", each Table.RowCount(_), type number}
        }
    )
in
    Grouped

Configure aggregations in model:

{
  "model": {
    "tables": [
      {
        "name": "Sales_Agg",
        "aggregations": [
          {
            "column": "TotalAmount",
            "expression": "SUM",
            "detailTable": "Sales",
            "detailColumn": "Amount"
          },
          {
            "column": "TotalQuantity",
            "expression": "SUM",
            "detailTable": "Sales",
            "detailColumn": "Quantity"
          }
        ]
      }
    ]
  }
}

Incremental Refresh

Configure incremental refresh for large datasets:

// In Power Query, create parameters
RangeStart = #datetime(2020, 1, 1, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]
RangeEnd = #datetime(2020, 12, 31, 23, 59, 59) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]

// Filter source data using parameters
let
    Source = Sql.Database("server", "database"),
    Sales = Source{[Schema="dbo",Item="Sales"]}[Data],
    FilteredRows = Table.SelectRows(
        Sales,
        each [OrderDate] >= RangeStart and [OrderDate] < RangeEnd
    )
in
    FilteredRows

Configure the policy in Power BI Desktop, then publish to Premium:

{
  "incrementalRefresh": {
    "refreshPolicy": {
      "rollingWindowGranularity": "month",
      "rollingWindowPeriods": 24,
      "incrementalGranularity": "day",
      "incrementalPeriods": 3,
      "pollingExpression": "SELECT MAX(ModifiedDate) FROM Sales",
      "detectDataChanges": true
    }
  }
}

Deployment Pipelines

Automate content deployment across environments:

def get_deployment_pipelines():
    response = requests.get(
        "https://api.powerbi.com/v1.0/myorg/pipelines",
        headers=headers
    )
    return response.json()["value"]

def deploy_content(pipeline_id, source_stage, target_stage, options=None):
    """
    Deploy content from one stage to another.

    source_stage: 'Development' (0), 'Test' (1), 'Production' (2)
    """
    body = {
        "sourceStageOrder": source_stage,
        "options": options or {
            "allowCreateArtifact": True,
            "allowOverwriteArtifact": True,
            "allowOverwriteTargetArtifactLabel": True
        }
    }

    response = requests.post(
        f"https://api.powerbi.com/v1.0/myorg/pipelines/{pipeline_id}/deployAll",
        headers=headers,
        json=body
    )

    return response.json()

# Deploy from Dev to Test
result = deploy_content(
    pipeline_id="your-pipeline-id",
    source_stage=0,  # Development
    target_stage=1   # Test
)

print(f"Deployment started: {result['id']}")

Power BI Premium capacities provide the enterprise features needed for large-scale analytics deployments. Proper capacity management ensures consistent performance for all users.