Azure Synapse Link for Dataverse: Real-Time Analytics on Business Data
Azure Synapse Link for Dataverse enables real-time analytics on your Dynamics 365 and Power Platform data without complex ETL pipelines. Announced at Ignite 2021, this integration brings operational and analytical workloads together.
What is Synapse Link for Dataverse?
Synapse Link continuously replicates Dataverse data to Azure Synapse Analytics:
- Near real-time sync: Changes appear in Synapse within minutes
- No ETL pipelines: Automatic, managed replication
- Schema evolution: Columns added in Dataverse appear automatically
- Historical data: Track changes over time with append-only mode
Setting Up Synapse Link
Prerequisites
- Dataverse environment with system administrator access
- Azure Synapse workspace
- Azure Data Lake Storage Gen2 account
Enable the Link
In Power Platform Admin Center:
# Using PowerShell to configure
Install-Module -Name Microsoft.PowerApps.Administration.PowerShell
Add-PowerAppsAccount
# Get your environment
$env = Get-AdminPowerAppEnvironment -EnvironmentName "Production"
# Enable Azure Synapse Link
# (This is typically done through the UI, but showing the concept)
$synapseConfig = @{
EnvironmentId = $env.EnvironmentName
SynapseWorkspaceUrl = "https://your-synapse.dev.azuresynapse.net"
StorageAccountUrl = "https://yourstorage.dfs.core.windows.net"
ContainerName = "dataverse"
}
Select Tables to Sync
Choose which Dataverse tables to replicate:
{
"linkedService": {
"tables": [
{
"name": "account",
"columns": ["accountid", "name", "revenue", "createdon", "modifiedon"]
},
{
"name": "contact",
"columns": ["contactid", "fullname", "emailaddress1", "parentcustomerid"]
},
{
"name": "opportunity",
"columns": ["opportunityid", "name", "estimatedvalue", "closeprobability", "actualclosedate"]
},
{
"name": "salesorder",
"columns": ["salesorderid", "name", "totalamount", "customerid", "createdon"]
}
]
}
}
Querying Dataverse Data in Synapse
Once linked, query your business data with SQL:
-- Create external data source for Dataverse data
CREATE EXTERNAL DATA SOURCE DataverseLink
WITH (
LOCATION = 'https://yourstorage.dfs.core.windows.net/dataverse'
);
-- Query accounts with serverless SQL
SELECT
accountid,
name,
revenue,
CAST(createdon AS DATE) as created_date
FROM
OPENROWSET(
BULK 'account/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS accounts
WHERE
revenue > 100000
ORDER BY
revenue DESC;
Building Analytics on Business Data
Sales Pipeline Analysis
-- Opportunity pipeline by stage
SELECT
o.stepname as stage,
COUNT(*) as opportunity_count,
SUM(o.estimatedvalue) as total_value,
AVG(o.closeprobability) as avg_probability
FROM
OPENROWSET(
BULK 'opportunity/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS o
WHERE
o.statecode = 0 -- Open opportunities
GROUP BY
o.stepname
ORDER BY
total_value DESC;
Customer 360 View
-- Join accounts with opportunities and orders
WITH CustomerMetrics AS (
SELECT
a.accountid,
a.name as account_name,
a.revenue as annual_revenue,
COUNT(DISTINCT o.opportunityid) as open_opportunities,
SUM(o.estimatedvalue) as pipeline_value,
COUNT(DISTINCT so.salesorderid) as total_orders,
SUM(so.totalamount) as lifetime_value
FROM
OPENROWSET(
BULK 'account/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS a
LEFT JOIN
OPENROWSET(
BULK 'opportunity/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS o ON a.accountid = o.parentaccountid AND o.statecode = 0
LEFT JOIN
OPENROWSET(
BULK 'salesorder/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS so ON a.accountid = so.customerid
GROUP BY
a.accountid, a.name, a.revenue
)
SELECT
account_name,
annual_revenue,
open_opportunities,
pipeline_value,
total_orders,
lifetime_value,
CASE
WHEN lifetime_value > 100000 THEN 'Enterprise'
WHEN lifetime_value > 25000 THEN 'Mid-Market'
ELSE 'SMB'
END as customer_tier
FROM CustomerMetrics
ORDER BY lifetime_value DESC;
Spark Analytics
Use Spark pools for advanced analytics:
# PySpark notebook in Synapse
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import RandomForestRegressor
# Read Dataverse data
accounts = spark.read.format("csv") \
.option("header", "true") \
.option("inferSchema", "true") \
.load("abfss://dataverse@yourstorage.dfs.core.windows.net/account/")
opportunities = spark.read.format("csv") \
.option("header", "true") \
.option("inferSchema", "true") \
.load("abfss://dataverse@yourstorage.dfs.core.windows.net/opportunity/")
# Prepare features for ML model
opp_features = opportunities \
.filter(col("statecode") == 2) \ # Won opportunities
.select(
"opportunityid",
"estimatedvalue",
"closeprobability",
"actualvalue",
datediff(col("actualclosedate"), col("createdon")).alias("days_to_close")
) \
.dropna()
# Build prediction model for opportunity value
assembler = VectorAssembler(
inputCols=["closeprobability", "days_to_close"],
outputCol="features"
)
training_data = assembler.transform(opp_features)
rf = RandomForestRegressor(
featuresCol="features",
labelCol="actualvalue",
numTrees=100
)
model = rf.fit(training_data)
# Score open opportunities
open_opps = opportunities \
.filter(col("statecode") == 0) \
.withColumn("days_open", datediff(current_date(), col("createdon")))
# Apply model
predictions = model.transform(assembler.transform(open_opps))
# Save predictions back
predictions.select(
"opportunityid",
"estimatedvalue",
col("prediction").alias("predicted_value")
).write \
.format("delta") \
.mode("overwrite") \
.save("abfss://analytics@yourstorage.dfs.core.windows.net/opportunity_predictions/")
Power BI Integration
Connect Power BI to Synapse for near real-time dashboards:
-- Create view for Power BI
CREATE VIEW dbo.vw_SalesDashboard AS
SELECT
CAST(o.createdon AS DATE) as order_date,
a.name as account_name,
a.industrycode as industry,
a.address1_country as country,
o.name as opportunity_name,
o.estimatedvalue,
o.actualvalue,
o.stepname as sales_stage,
CASE o.statecode
WHEN 0 THEN 'Open'
WHEN 1 THEN 'Won'
WHEN 2 THEN 'Lost'
END as status
FROM
OPENROWSET(
BULK 'opportunity/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS o
JOIN
OPENROWSET(
BULK 'account/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS a ON o.parentaccountid = a.accountid;
DAX measures for the dashboard:
// Win rate calculation
Win Rate =
DIVIDE(
CALCULATE(COUNTROWS(Sales), Sales[status] = "Won"),
CALCULATE(COUNTROWS(Sales), Sales[status] IN {"Won", "Lost"}),
0
)
// Pipeline value trend
Pipeline Trend =
CALCULATE(
SUM(Sales[estimatedvalue]),
Sales[status] = "Open",
DATESINPERIOD(
'Date'[Date],
MAX('Date'[Date]),
-30,
DAY
)
)
// Forecast accuracy
Forecast Accuracy =
AVERAGEX(
FILTER(Sales, Sales[status] = "Won"),
1 - ABS(Sales[actualvalue] - Sales[estimatedvalue]) / Sales[actualvalue]
)
Change Data Capture
Track historical changes with append-only mode:
-- Query change history
SELECT
accountid,
name,
revenue,
SinkModifiedOn as change_timestamp,
IsDeleted
FROM
OPENROWSET(
BULK 'account/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS accounts
WHERE
accountid = 'specific-account-id'
ORDER BY
SinkModifiedOn DESC;
-- Revenue changes over time
WITH RevenueHistory AS (
SELECT
accountid,
name,
revenue,
SinkModifiedOn,
LAG(revenue) OVER (PARTITION BY accountid ORDER BY SinkModifiedOn) as previous_revenue
FROM
OPENROWSET(
BULK 'account/*.csv',
DATA_SOURCE = 'DataverseLink',
FORMAT = 'CSV',
PARSER_VERSION = '2.0',
HEADER_ROW = TRUE
) AS accounts
)
SELECT
accountid,
name,
previous_revenue,
revenue as current_revenue,
revenue - previous_revenue as change,
SinkModifiedOn as change_date
FROM RevenueHistory
WHERE previous_revenue IS NOT NULL
AND revenue <> previous_revenue
ORDER BY SinkModifiedOn DESC;
Monitoring and Troubleshooting
# Monitor sync status
import requests
from datetime import datetime, timedelta
def check_sync_health(environment_id, linked_service_name):
"""Check Synapse Link synchronization health."""
# Get sync status from Azure Monitor
query = f"""
DataverseSyncLogs
| where EnvironmentId == '{environment_id}'
| where LinkedServiceName == '{linked_service_name}'
| where TimeGenerated > ago(1h)
| summarize
LastSync = max(TimeGenerated),
RowsSynced = sum(RowsProcessed),
Errors = countif(Status == 'Failed')
"""
# Execute query against Log Analytics
return {
"status": "Healthy" if errors == 0 else "Warning",
"last_sync": last_sync,
"rows_synced": rows_synced,
"errors": errors
}
# Alert on sync delays
def create_sync_alert():
alert_rule = {
"condition": {
"allOf": [{
"query": """
DataverseSyncLogs
| where TimeGenerated > ago(30m)
| where Status == 'Failed'
| count
""",
"threshold": 0,
"operator": "GreaterThan"
}]
},
"actions": {
"actionGroups": ["dataverse-alerts"]
}
}
return alert_rule
Azure Synapse Link for Dataverse eliminates the traditional barriers between operational and analytical systems. Business users get insights from live data while IT avoids the complexity of building and maintaining ETL pipelines.