6 min read
T-SQL in Fabric: Patterns and Best Practices
T-SQL is the query language for Fabric Warehouse and SQL endpoints. Today we’ll explore patterns and best practices for writing effective T-SQL in Fabric.
T-SQL Surface Area in Fabric
-- Fabric supports most T-SQL features
-- Some differences from SQL Server:
supported_features = {
"DDL": "CREATE, ALTER, DROP for tables, views, procedures",
"DML": "SELECT, INSERT, UPDATE, DELETE, MERGE",
"Aggregations": "All standard aggregate functions",
"Window_functions": "Full support",
"CTEs": "Common Table Expressions",
"Temp_tables": "#temp tables supported",
"Table_variables": "@table variables supported"
}
-- Not supported (as of July 2023):
-- IDENTITY columns
-- Triggers
-- User-defined functions (UDFs)
-- Cursors (use set-based operations)
-- OPENROWSET for external data
Data Transformation Patterns
Slowly Changing Dimension Type 2
-- SCD Type 2 pattern
CREATE PROCEDURE usp_merge_customer_scd2
AS
BEGIN
DECLARE @today DATE = CAST(GETUTCDATE() AS DATE);
-- Step 1: Close existing records that have changes
UPDATE dim_customer
SET
is_current = 0,
end_date = DATEADD(day, -1, @today),
modified_date = GETUTCDATE()
WHERE is_current = 1
AND customer_id IN (
SELECT s.customer_id
FROM staging_customer s
INNER JOIN dim_customer d ON s.customer_id = d.customer_id
WHERE d.is_current = 1
AND (
ISNULL(s.customer_name, '') <> ISNULL(d.customer_name, '')
OR ISNULL(s.email, '') <> ISNULL(d.email, '')
OR ISNULL(s.segment, '') <> ISNULL(d.segment, '')
)
);
-- Step 2: Insert new versions of changed records
INSERT INTO dim_customer (
customer_key, customer_id, customer_name, email, segment,
start_date, end_date, is_current, created_date, modified_date
)
SELECT
(SELECT ISNULL(MAX(customer_key), 0) FROM dim_customer) +
ROW_NUMBER() OVER (ORDER BY s.customer_id),
s.customer_id,
s.customer_name,
s.email,
s.segment,
@today,
'9999-12-31',
1,
GETUTCDATE(),
GETUTCDATE()
FROM staging_customer s
INNER JOIN dim_customer d ON s.customer_id = d.customer_id
WHERE d.is_current = 0
AND d.end_date = DATEADD(day, -1, @today);
-- Step 3: Insert new records (never seen before)
INSERT INTO dim_customer (
customer_key, customer_id, customer_name, email, segment,
start_date, end_date, is_current, created_date, modified_date
)
SELECT
(SELECT ISNULL(MAX(customer_key), 0) FROM dim_customer) +
ROW_NUMBER() OVER (ORDER BY s.customer_id),
s.customer_id,
s.customer_name,
s.email,
s.segment,
@today,
'9999-12-31',
1,
GETUTCDATE(),
GETUTCDATE()
FROM staging_customer s
WHERE NOT EXISTS (
SELECT 1 FROM dim_customer d WHERE d.customer_id = s.customer_id
);
END;
MERGE Statement
-- MERGE for upsert operations
MERGE INTO dim_product AS target
USING staging_product AS source
ON target.product_id = source.product_id
WHEN MATCHED AND (
target.product_name <> source.product_name OR
target.category <> source.category OR
target.unit_price <> source.unit_price
) THEN
UPDATE SET
product_name = source.product_name,
category = source.category,
unit_price = source.unit_price,
modified_date = GETUTCDATE()
WHEN NOT MATCHED BY TARGET THEN
INSERT (product_key, product_id, product_name, category, unit_price, created_date)
VALUES (
(SELECT ISNULL(MAX(product_key), 0) + 1 FROM dim_product),
source.product_id,
source.product_name,
source.category,
source.unit_price,
GETUTCDATE()
)
WHEN NOT MATCHED BY SOURCE THEN
UPDATE SET is_active = 0, modified_date = GETUTCDATE();
Analytical Query Patterns
Rolling Aggregations
-- Rolling 7-day average
SELECT
date_key,
daily_sales,
AVG(daily_sales) OVER (
ORDER BY date_key
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as rolling_7day_avg,
SUM(daily_sales) OVER (
ORDER BY date_key
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as rolling_7day_sum
FROM (
SELECT
date_key,
SUM(total_amount) as daily_sales
FROM fact_sales
GROUP BY date_key
) daily
ORDER BY date_key;
Year-over-Year Comparison
-- YoY comparison
WITH current_year AS (
SELECT
d.month_num,
d.month_name,
SUM(s.total_amount) as revenue
FROM fact_sales s
JOIN dim_date d ON s.date_key = d.date_key
WHERE d.year_num = 2023
GROUP BY d.month_num, d.month_name
),
prior_year AS (
SELECT
d.month_num,
SUM(s.total_amount) as revenue
FROM fact_sales s
JOIN dim_date d ON s.date_key = d.date_key
WHERE d.year_num = 2022
GROUP BY d.month_num
)
SELECT
cy.month_name,
cy.revenue as revenue_2023,
py.revenue as revenue_2022,
cy.revenue - py.revenue as yoy_change,
CASE
WHEN py.revenue = 0 THEN NULL
ELSE (cy.revenue - py.revenue) / py.revenue * 100
END as yoy_percent_change
FROM current_year cy
LEFT JOIN prior_year py ON cy.month_num = py.month_num
ORDER BY cy.month_num;
Cohort Analysis
-- Customer cohort analysis
WITH first_purchase AS (
SELECT
customer_key,
MIN(date_key) as cohort_date_key
FROM fact_sales
GROUP BY customer_key
),
cohort_data AS (
SELECT
fp.cohort_date_key,
s.date_key,
s.customer_key,
s.total_amount,
-- Calculate months since first purchase
DATEDIFF(month,
CAST(CAST(fp.cohort_date_key AS VARCHAR) AS DATE),
CAST(CAST(s.date_key AS VARCHAR) AS DATE)
) as months_since_first
FROM fact_sales s
JOIN first_purchase fp ON s.customer_key = fp.customer_key
)
SELECT
LEFT(CAST(cohort_date_key AS VARCHAR), 6) as cohort_month,
months_since_first,
COUNT(DISTINCT customer_key) as customers,
SUM(total_amount) as revenue
FROM cohort_data
GROUP BY cohort_date_key, months_since_first
ORDER BY cohort_date_key, months_since_first;
Data Quality Patterns
Validation Queries
-- Data quality checks
CREATE PROCEDURE usp_validate_fact_sales
AS
BEGIN
DECLARE @errors TABLE (
check_name NVARCHAR(100),
error_count INT,
sample_values NVARCHAR(MAX)
);
-- Check 1: Null keys
INSERT INTO @errors
SELECT
'Null customer_key',
COUNT(*),
STRING_AGG(CAST(sales_key AS VARCHAR), ',')
FROM fact_sales
WHERE customer_key IS NULL;
-- Check 2: Orphan records
INSERT INTO @errors
SELECT
'Orphan customer records',
COUNT(*),
NULL
FROM fact_sales s
WHERE NOT EXISTS (
SELECT 1 FROM dim_customer c WHERE c.customer_key = s.customer_key
);
-- Check 3: Negative amounts
INSERT INTO @errors
SELECT
'Negative total_amount',
COUNT(*),
STRING_AGG(CAST(sales_key AS VARCHAR), ',')
FROM fact_sales
WHERE total_amount < 0;
-- Check 4: Future dates
INSERT INTO @errors
SELECT
'Future date_key',
COUNT(*),
NULL
FROM fact_sales
WHERE date_key > CONVERT(INT, FORMAT(GETDATE(), 'yyyyMMdd'));
-- Return results
SELECT * FROM @errors WHERE error_count > 0;
-- Raise error if issues found
IF EXISTS (SELECT 1 FROM @errors WHERE error_count > 0)
BEGIN
RAISERROR('Data quality issues detected', 16, 1);
END
END;
Deduplication
-- Find and remove duplicates
WITH duplicates AS (
SELECT
sales_key,
ROW_NUMBER() OVER (
PARTITION BY order_id, customer_key, product_key, date_key
ORDER BY created_at DESC
) as rn
FROM fact_sales
)
DELETE FROM fact_sales
WHERE sales_key IN (
SELECT sales_key FROM duplicates WHERE rn > 1
);
Performance Tips
-- 1. Use appropriate predicates
-- Good: Filter on partitioned/indexed columns
SELECT * FROM fact_sales WHERE date_key BETWEEN 20230101 AND 20230131;
-- 2. Avoid functions on columns in WHERE
-- Bad:
SELECT * FROM fact_sales WHERE YEAR(created_at) = 2023;
-- Good:
SELECT * FROM fact_sales
WHERE created_at >= '2023-01-01' AND created_at < '2024-01-01';
-- 3. Use EXISTS instead of IN for subqueries
-- Better performance for large datasets
SELECT * FROM dim_customer c
WHERE EXISTS (
SELECT 1 FROM fact_sales s WHERE s.customer_key = c.customer_key
);
-- 4. Limit result sets during development
SELECT TOP 100 * FROM large_table;
-- 5. Use EXPLAIN to understand query plans
-- Check the Query Insights feature in Fabric portal
Tomorrow we’ll explore cross-database queries in Fabric.