Back to Blog
3 min read

Azure Synapse Serverless SQL: Query Data Lake On-Demand

Synapse Serverless SQL queries data lake files directly—no loading, no infrastructure. Pay per TB scanned, perfect for ad-hoc analytics.

Query Parquet Files

-- Query parquet directly from storage
SELECT
    CustomerID,
    OrderDate,
    TotalAmount
FROM OPENROWSET(
    BULK 'https://storage.blob.core.windows.net/data/orders/*.parquet',
    FORMAT = 'PARQUET'
) AS orders
WHERE OrderDate >= '2021-01-01'

Query CSV Files

SELECT *
FROM OPENROWSET(
    BULK 'https://storage.blob.core.windows.net/data/sales.csv',
    FORMAT = 'CSV',
    PARSER_VERSION = '2.0',
    HEADER_ROW = TRUE,
    FIELDTERMINATOR = ',',
    ROWTERMINATOR = '\n'
) AS sales

Define Schema with WITH

SELECT *
FROM OPENROWSET(
    BULK 'https://storage.blob.core.windows.net/data/customers.csv',
    FORMAT = 'CSV',
    PARSER_VERSION = '2.0',
    HEADER_ROW = TRUE
) WITH (
    CustomerID INT,
    FirstName VARCHAR(50),
    LastName VARCHAR(50),
    Email VARCHAR(100),
    CreatedDate DATE
) AS customers

Query JSON Files

SELECT
    JSON_VALUE(doc, '$.customerId') AS customerId,
    JSON_VALUE(doc, '$.order.total') AS total,
    JSON_QUERY(doc, '$.order.items') AS items
FROM OPENROWSET(
    BULK 'https://storage.blob.core.windows.net/data/orders.json',
    FORMAT = 'CSV',
    FIELDTERMINATOR = '0x0b',
    FIELDQUOTE = '0x0b'
) WITH (doc NVARCHAR(MAX)) AS orders

Delta Lake Support

-- Query Delta tables directly
SELECT *
FROM OPENROWSET(
    BULK 'https://storage.blob.core.windows.net/delta/sales',
    FORMAT = 'DELTA'
) AS delta_sales
WHERE SaleDate >= '2021-01-01'

Create External Tables

-- Create database
CREATE DATABASE analytics;

-- Create external data source
CREATE EXTERNAL DATA SOURCE DataLake
WITH (
    LOCATION = 'https://storage.blob.core.windows.net/data'
);

-- Create external file format
CREATE EXTERNAL FILE FORMAT ParquetFormat
WITH (FORMAT_TYPE = PARQUET);

-- Create external table
CREATE EXTERNAL TABLE Sales (
    SaleId INT,
    ProductId INT,
    Quantity INT,
    Amount DECIMAL(10,2),
    SaleDate DATE
)
WITH (
    LOCATION = 'sales/',
    DATA_SOURCE = DataLake,
    FILE_FORMAT = ParquetFormat
);

-- Query like regular table
SELECT * FROM Sales WHERE SaleDate >= '2021-01-01'

Partition Pruning

-- Leverage partition columns for efficiency
SELECT *
FROM OPENROWSET(
    BULK 'https://storage.blob.core.windows.net/data/sales/year=*/month=*/*.parquet',
    FORMAT = 'PARQUET'
) AS sales
WHERE sales.filepath(1) = '2021'  -- year partition
  AND sales.filepath(2) = '01'    -- month partition

Create Views

CREATE VIEW vw_RecentOrders AS
SELECT
    OrderId,
    CustomerID,
    OrderDate,
    TotalAmount
FROM OPENROWSET(
    BULK 'https://storage.blob.core.windows.net/data/orders/*.parquet',
    FORMAT = 'PARQUET'
) AS orders
WHERE OrderDate >= DATEADD(day, -30, GETDATE())

CETAS (Create External Table As Select)

-- Transform and persist results
CREATE EXTERNAL TABLE ProcessedSales
WITH (
    LOCATION = 'processed/sales/',
    DATA_SOURCE = DataLake,
    FILE_FORMAT = ParquetFormat
)
AS
SELECT
    CustomerID,
    YEAR(OrderDate) AS Year,
    MONTH(OrderDate) AS Month,
    SUM(TotalAmount) AS TotalSales
FROM OPENROWSET(...) AS orders
GROUP BY CustomerID, YEAR(OrderDate), MONTH(OrderDate)

Cost Optimization

TipBenefit
Use ParquetColumnar = less data scanned
Partition dataPrune irrelevant files
Select specific columnsDon’t SELECT *
Use external tablesMetadata caching

Serverless SQL: query anything, pay for what you scan.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.