6 min read
Synapse Pathway: Accelerating Data Warehouse Migration
Migrating from legacy data warehouses to Azure Synapse Analytics is a significant undertaking. Synapse Pathway is a free tool that automates code translation, making migrations faster and less error-prone.
What is Synapse Pathway?
Synapse Pathway translates SQL code from various sources to Azure Synapse Analytics SQL syntax:
- Teradata to Synapse
- Netezza to Synapse
- Snowflake to Synapse
- Amazon Redshift to Synapse
- SQL Server to Synapse
Getting Started
Installation
# Download Synapse Pathway
Invoke-WebRequest -Uri "https://aka.ms/synapse-pathway-download" -OutFile "SynapsePathway.msi"
# Install
Start-Process msiexec.exe -ArgumentList '/i SynapsePathway.msi /quiet' -Wait
# Verify installation
& "$env:ProgramFiles\Microsoft\Synapse Pathway\SynapsePathway.exe" --version
Basic Translation
# Translate a single file
SynapsePathway.exe `
--source Teradata `
--input "C:\Migration\input\script.sql" `
--output "C:\Migration\output"
# Translate entire directory
SynapsePathway.exe `
--source Teradata `
--input "C:\Migration\input" `
--output "C:\Migration\output" `
--recursive
Configuration
{
"translationConfig": {
"sourceSystem": "Teradata",
"targetSystem": "SynapseSQL",
"inputPath": "./input",
"outputPath": "./output",
"errorHandling": "ContinueOnError",
"options": {
"preserveComments": true,
"generateLineNumbers": true,
"inlineConversions": false,
"schemaMapping": {
"DBC": "dbo",
"PROD_DB": "production"
},
"dataTypeMapping": {
"BYTEINT": "TINYINT",
"NUMBER(38,0)": "BIGINT"
}
}
}
}
Teradata to Synapse Example
Original Teradata Code
-- Teradata source
CREATE MULTISET TABLE PROD_DB.CUSTOMER_SALES
, NO FALLBACK
, NO BEFORE JOURNAL
, NO AFTER JOURNAL
, CHECKSUM = DEFAULT
(
customer_id INTEGER NOT NULL,
customer_name VARCHAR(100) CHARACTER SET LATIN NOT CASESPECIFIC,
sale_date DATE FORMAT 'YYYY-MM-DD',
sale_amount DECIMAL(18,2),
region VARCHAR(50)
)
PRIMARY INDEX (customer_id)
PARTITION BY RANGE_N(sale_date BETWEEN DATE '2020-01-01' AND DATE '2025-12-31' EACH INTERVAL '1' MONTH);
-- Teradata specific function
SELECT
customer_id,
customer_name,
ZEROIFNULL(sale_amount) as sale_amount,
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY sale_date DESC) = 1
FROM PROD_DB.CUSTOMER_SALES
WHERE sale_date BETWEEN ADD_MONTHS(CURRENT_DATE, -12) AND CURRENT_DATE;
Translated Synapse Code
-- Synapse translated output
CREATE TABLE production.CUSTOMER_SALES
(
customer_id INT NOT NULL,
customer_name NVARCHAR(100),
sale_date DATE,
sale_amount DECIMAL(18,2),
region NVARCHAR(50)
)
WITH
(
DISTRIBUTION = HASH(customer_id),
CLUSTERED COLUMNSTORE INDEX,
PARTITION (sale_date RANGE RIGHT FOR VALUES
('2020-01-01', '2020-02-01', '2020-03-01', /* ... */ '2025-12-01'))
);
-- Translated query
SELECT
customer_id,
customer_name,
ISNULL(sale_amount, 0) as sale_amount
FROM (
SELECT
customer_id,
customer_name,
sale_amount,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY sale_date DESC) as rn
FROM production.CUSTOMER_SALES
WHERE sale_date BETWEEN DATEADD(month, -12, CURRENT_TIMESTAMP) AND CURRENT_TIMESTAMP
) t
WHERE rn = 1;
Netezza to Synapse
Original Netezza
-- Netezza source
CREATE TABLE SALES_FACT (
sale_id BIGINT NOT NULL,
product_id INTEGER,
customer_id INTEGER,
sale_date DATE,
quantity INTEGER,
unit_price NUMERIC(10,2),
total_amount NUMERIC(18,2)
)
DISTRIBUTE ON (sale_id);
-- Netezza specific syntax
SELECT
DECODE(status, 1, 'Active', 2, 'Inactive', 'Unknown') as status_desc,
NVL(discount, 0) as discount,
EXTRACT(MONTH FROM sale_date) as sale_month,
DATE_TRUNC('MONTH', sale_date) as month_start
FROM SALES_FACT
WHERE sale_date > CURRENT_DATE - 365;
Translated to Synapse
-- Synapse translated output
CREATE TABLE dbo.SALES_FACT (
sale_id BIGINT NOT NULL,
product_id INT,
customer_id INT,
sale_date DATE,
quantity INT,
unit_price DECIMAL(10,2),
total_amount DECIMAL(18,2)
)
WITH (
DISTRIBUTION = HASH(sale_id),
CLUSTERED COLUMNSTORE INDEX
);
-- Translated query
SELECT
CASE status WHEN 1 THEN 'Active' WHEN 2 THEN 'Inactive' ELSE 'Unknown' END as status_desc,
ISNULL(discount, 0) as discount,
MONTH(sale_date) as sale_month,
DATEADD(month, DATEDIFF(month, 0, sale_date), 0) as month_start
FROM dbo.SALES_FACT
WHERE sale_date > DATEADD(day, -365, GETDATE());
Handling Complex Transformations
Stored Procedures
-- Original Teradata procedure
REPLACE PROCEDURE PROD_DB.usp_UpdateSales(
IN p_start_date DATE,
IN p_end_date DATE
)
BEGIN
DECLARE v_count INTEGER;
DELETE FROM SALES_STAGING;
INSERT INTO SALES_STAGING
SELECT * FROM SALES_FACT
WHERE sale_date BETWEEN p_start_date AND p_end_date;
SELECT COUNT(*) INTO v_count FROM SALES_STAGING;
IF v_count > 0 THEN
MERGE INTO SALES_TARGET t
USING SALES_STAGING s
ON t.sale_id = s.sale_id
WHEN MATCHED THEN UPDATE SET
t.quantity = s.quantity,
t.total_amount = s.total_amount
WHEN NOT MATCHED THEN INSERT VALUES
(s.sale_id, s.product_id, s.customer_id, s.sale_date, s.quantity, s.unit_price, s.total_amount);
END IF;
END;
-- Synapse translated procedure
CREATE PROCEDURE production.usp_UpdateSales
@p_start_date DATE,
@p_end_date DATE
AS
BEGIN
SET NOCOUNT ON;
DECLARE @v_count INT;
TRUNCATE TABLE dbo.SALES_STAGING;
INSERT INTO dbo.SALES_STAGING
SELECT * FROM dbo.SALES_FACT
WHERE sale_date BETWEEN @p_start_date AND @p_end_date;
SELECT @v_count = COUNT(*) FROM dbo.SALES_STAGING;
IF @v_count > 0
BEGIN
-- Synapse doesn't support MERGE, use UPDATE + INSERT
UPDATE t
SET
t.quantity = s.quantity,
t.total_amount = s.total_amount
FROM dbo.SALES_TARGET t
INNER JOIN dbo.SALES_STAGING s ON t.sale_id = s.sale_id;
INSERT INTO dbo.SALES_TARGET
SELECT s.*
FROM dbo.SALES_STAGING s
LEFT JOIN dbo.SALES_TARGET t ON s.sale_id = t.sale_id
WHERE t.sale_id IS NULL;
END
END;
Assessment Report
Synapse Pathway generates detailed assessment reports:
{
"assessmentSummary": {
"totalObjects": 1250,
"translatedSuccessfully": 1180,
"partiallyTranslated": 45,
"requiresManualReview": 25,
"translationRate": "94.4%"
},
"objectBreakdown": {
"tables": {
"total": 500,
"translated": 498,
"partial": 2
},
"views": {
"total": 300,
"translated": 285,
"partial": 15
},
"procedures": {
"total": 200,
"translated": 175,
"partial": 18,
"manual": 7
},
"functions": {
"total": 150,
"translated": 135,
"partial": 10,
"manual": 5
},
"scripts": {
"total": 100,
"translated": 87,
"manual": 13
}
},
"issueCategories": [
{
"category": "Unsupported Function",
"count": 15,
"examples": ["HASHROW()", "STRTOK()"]
},
{
"category": "Different Behavior",
"count": 8,
"examples": ["NULL handling in aggregates"]
}
]
}
Migration Workflow
# migration-pipeline.yaml
stages:
- stage: Assessment
jobs:
- job: GenerateReport
steps:
- script: |
SynapsePathway.exe --assess \
--source Teradata \
--input $(inputPath) \
--report $(reportPath)
- stage: Translation
dependsOn: Assessment
condition: succeeded()
jobs:
- job: TranslateCode
steps:
- script: |
SynapsePathway.exe \
--source Teradata \
--input $(inputPath) \
--output $(outputPath) \
--config $(configPath)
- stage: Validation
dependsOn: Translation
jobs:
- job: SyntaxCheck
steps:
- script: |
foreach ($file in Get-ChildItem $(outputPath) -Filter *.sql) {
sqlcmd -S $(synapseEndpoint) -d $(database) -i $file.FullName -b
}
- stage: Deployment
dependsOn: Validation
condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/main'))
jobs:
- job: DeployToSynapse
steps:
- task: SqlAzureDacpacDeployment@1
inputs:
azureSubscription: $(subscription)
serverName: $(synapseEndpoint)
databaseName: $(database)
sqlFile: $(outputPath)/**/*.sql
Best Practices
- Start with assessment: Run assessment first to understand scope
- Handle exceptions: Plan for manual translation of complex objects
- Test thoroughly: Validate query results match source system
- Iterate: Run translation multiple times as you refine configurations
- Version control: Track both original and translated code
Conclusion
Synapse Pathway significantly reduces the effort required for data warehouse migrations. While it handles the bulk of translation automatically, plan for some manual work on complex transformations. The assessment reports help you understand the migration scope and plan resources accordingly.