Skip to content
Back to Blog
1 min read

Synapse Dedicated SQL Pools: Performance Tuning Guide

I wrote “Synapse Dedicated SQL Pools: Performance Tuning Guide” to share practical, production-minded guidance on this topic.

Understanding Distribution

Choose the right distribution strategy for your tables:

-- Hash distribution for large fact tables
CREATE TABLE FactSales
(
    SalesKey BIGINT NOT NULL,
    CustomerKey INT NOT NULL,
    ProductKey INT NOT NULL,
    OrderDate DATE NOT NULL,
    Amount DECIMAL(18,2),
    Quantity INT
)
WITH
(
    DISTRIBUTION = HASH(CustomerKey),
    CLUSTERED COLUMNSTORE INDEX
);

-- Replicate small dimension tables
CREATE TABLE DimProduct
(
    ProductKey INT NOT NULL,
    ProductName NVARCHAR(100),
    Category NVARCHAR(50),
    SubCategory NVARCHAR(50)
)
WITH
(
    DISTRIBUTION = REPLICATE,
    CLUSTERED COLUMNSTORE INDEX
);

-- Round-robin for staging tables
CREATE TABLE StgSales
(
    SalesKey BIGINT,
    CustomerKey INT,
    RawData NVARCHAR(MAX)
)
WITH
(
    DISTRIBUTION = ROUND_ROBIN,
    HEAP
);

Columnstore Index Optimization

-- Check columnstore health
SELECT
    t.name AS table_name,
    i.name AS index_name,
    rg.state_description,
    rg.total_rows,
    rg.deleted_rows,
    CAST(100.0 * rg.deleted_rows / rg.total_rows AS DECIMAL(5,2)) AS deleted_pct
FROM sys.column_store_row_groups rg
JOIN sys.tables t ON rg.object_id = t.object_id
JOIN sys.indexes i ON rg.object_id = i.object_id AND rg.index_id = i.index_id
WHERE rg.state_description = 'OPEN' OR rg.deleted_rows > 0
ORDER BY deleted_pct DESC;

-- Rebuild fragmented columnstore indexes
ALTER INDEX CCI_FactSales ON FactSales REBUILD
WITH (ONLINE = ON);

-- Reorganize to merge row groups
ALTER INDEX CCI_FactSales ON FactSales REORGANIZE
WITH (COMPRESS_ALL_ROW_GROUPS = ON);

Statistics Management

-- Create statistics on key columns
CREATE STATISTICS stats_customer ON FactSales(CustomerKey);
CREATE STATISTICS stats_product ON FactSales(ProductKey);
CREATE STATISTICS stats_date ON FactSales(OrderDate);

-- Multi-column statistics
CREATE STATISTICS stats_customer_product
ON FactSales(CustomerKey, ProductKey);

-- Update statistics
UPDATE STATISTICS FactSales;

-- Check statistics freshness
SELECT
    t.name AS table_name,
    s.name AS statistics_name,
    STATS_DATE(s.object_id, s.stats_id) AS last_updated,
    sp.rows AS row_count,
    sp.modification_counter AS modifications
FROM sys.stats s
JOIN sys.tables t ON s.object_id = t.object_id
CROSS APPLY sys.dm_db_stats_properties(s.object_id, s.stats_id) sp
ORDER BY sp.modification_counter DESC;

Workload Management

-- Create workload classifier for priority queries
CREATE WORKLOAD CLASSIFIER HighPriorityReports
WITH (
    WORKLOAD_GROUP = 'xlargerc',
    MEMBERNAME = 'ReportingUser',
    IMPORTANCE = HIGH
);

-- Create workload group for batch processing
CREATE WORKLOAD GROUP BatchProcessing
WITH (
    MIN_PERCENTAGE_RESOURCE = 0,
    MAX_PERCENTAGE_RESOURCE = 50,
    CAP_PERCENTAGE_RESOURCE = 50,
    REQUEST_MIN_RESOURCE_GRANT_PERCENT = 10,
    REQUEST_MAX_RESOURCE_GRANT_PERCENT = 25
);

CREATE WORKLOAD CLASSIFIER BatchJobs
WITH (
    WORKLOAD_GROUP = 'BatchProcessing',
    MEMBERNAME = 'ETLUser',
    IMPORTANCE = LOW
);

-- Monitor workload groups
SELECT
    wg.name AS workload_group,
    wc.name AS classifier_name,
    wc.importance,
    running_requests = r.active_count,
    queued_requests = r.queued_count
FROM sys.workload_management_workload_groups wg
LEFT JOIN sys.workload_management_workload_classifiers wc
    ON wg.group_id = wc.group_id
LEFT JOIN (
    SELECT
        group_id,
        COUNT(CASE WHEN status = 'Running' THEN 1 END) as active_count,
        COUNT(CASE WHEN status = 'Queued' THEN 1 END) as queued_count
    FROM sys.dm_pdw_exec_requests
    WHERE status IN ('Running', 'Queued')
    GROUP BY group_id
) r ON wg.group_id = r.group_id;

Materialized Views

-- Create materialized view for common aggregations
CREATE MATERIALIZED VIEW mv_DailySalesSummary
WITH (DISTRIBUTION = HASH(CustomerKey))
AS
SELECT
    CustomerKey,
    ProductKey,
    CAST(OrderDate AS DATE) AS SaleDate,
    SUM(Amount) AS TotalAmount,
    SUM(Quantity) AS TotalQuantity,
    COUNT(*) AS TransactionCount
FROM FactSales
GROUP BY
    CustomerKey,
    ProductKey,
    CAST(OrderDate AS DATE);

-- Check if materialized view is being used
EXPLAIN
SELECT
    CustomerKey,
    SUM(Amount) AS TotalAmount
FROM FactSales
GROUP BY CustomerKey;

Query Optimization

-- Use EXPLAIN to analyze query plans
EXPLAIN
SELECT
    c.CustomerName,
    SUM(f.Amount) as TotalSales
FROM FactSales f
JOIN DimCustomer c ON f.CustomerKey = c.CustomerKey
WHERE f.OrderDate >= '2022-01-01'
GROUP BY c.CustomerName
ORDER BY TotalSales DESC;

-- Avoid data movement with proper distribution
-- Good: Join on distribution column
SELECT f.*, c.CustomerName
FROM FactSales f
JOIN DimCustomer c ON f.CustomerKey = c.CustomerKey;

-- Monitor data movement
SELECT
    request_id,
    type,
    status,
    total_elapsed_time,
    data_processed
FROM sys.dm_pdw_dms_workers
WHERE request_id IN (
    SELECT request_id
    FROM sys.dm_pdw_exec_requests
    WHERE status = 'Running'
)
ORDER BY total_elapsed_time DESC;

Result Set Caching

-- Enable result set caching
ALTER DATABASE [YourDatabase]
SET RESULT_SET_CACHING ON;

-- Check cache hit ratio
SELECT
    result_cache_hit,
    COUNT(*) as query_count
FROM sys.dm_pdw_exec_requests
WHERE status = 'Completed'
    AND command LIKE 'SELECT%'
GROUP BY result_cache_hit;

These optimizations can dramatically improve dedicated SQL pool performance while reducing resource consumption.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.