4 min read
Azure Data Factory Web Activity: Integrating REST APIs in Data Pipelines
The Web Activity in Azure Data Factory enables calling REST endpoints from within your data pipelines. It’s essential for integrating with external services, triggering workflows, and retrieving data from APIs.
Basic Web Activity
{
"name": "CallRestApi",
"type": "WebActivity",
"typeProperties": {
"url": "https://api.service.com/v1/data",
"method": "GET",
"headers": {
"Content-Type": "application/json",
"Accept": "application/json"
},
"authentication": {
"type": "MSI",
"resource": "https://api.service.com"
}
}
}
POST Request with Body
{
"name": "PostToApi",
"type": "WebActivity",
"typeProperties": {
"url": "https://api.service.com/v1/process",
"method": "POST",
"headers": {
"Content-Type": "application/json"
},
"body": {
"value": "@json(concat('{\"jobId\": \"', pipeline().RunId, '\", \"tableName\": \"', pipeline().parameters.tableName, '\", \"timestamp\": \"', utcnow(), '\"}'))",
"type": "Expression"
}
}
}
Authentication Methods
API Key Authentication
{
"name": "ApiKeyAuth",
"type": "WebActivity",
"typeProperties": {
"url": "https://api.service.com/data",
"method": "GET",
"headers": {
"X-API-Key": {
"value": "@pipeline().parameters.apiKey",
"type": "Expression"
}
}
}
}
OAuth Bearer Token
{
"name": "GetOAuthToken",
"type": "WebActivity",
"typeProperties": {
"url": "https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token",
"method": "POST",
"headers": {
"Content-Type": "application/x-www-form-urlencoded"
},
"body": "grant_type=client_credentials&client_id={client_id}&client_secret={secret}&scope=https://api.service.com/.default"
}
}
{
"name": "CallApiWithToken",
"type": "WebActivity",
"dependsOn": [
{
"activity": "GetOAuthToken",
"dependencyConditions": ["Succeeded"]
}
],
"typeProperties": {
"url": "https://api.service.com/data",
"method": "GET",
"headers": {
"Authorization": {
"value": "@concat('Bearer ', activity('GetOAuthToken').output.access_token)",
"type": "Expression"
}
}
}
}
Managed Identity Authentication
{
"name": "MsiAuthWebActivity",
"type": "WebActivity",
"typeProperties": {
"url": "https://management.azure.com/subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.Sql/servers/{server}/databases?api-version=2021-02-01-preview",
"method": "GET",
"authentication": {
"type": "MSI",
"resource": "https://management.azure.com"
}
}
}
Calling Azure Functions
{
"name": "CallAzureFunction",
"type": "WebActivity",
"typeProperties": {
"url": {
"value": "@concat('https://', pipeline().parameters.functionAppName, '.azurewebsites.net/api/ProcessData')",
"type": "Expression"
},
"method": "POST",
"headers": {
"Content-Type": "application/json",
"x-functions-key": {
"value": "@pipeline().parameters.functionKey",
"type": "Expression"
}
},
"body": {
"value": "@pipeline().parameters.functionInput",
"type": "Expression"
}
},
"linkedServiceName": {
"referenceName": "AzureFunctionLinkedService",
"type": "LinkedServiceReference"
}
}
Error Handling and Retry
{
"name": "WebActivityWithRetry",
"type": "WebActivity",
"policy": {
"timeout": "0.00:10:00",
"retry": 3,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"typeProperties": {
"url": "https://api.service.com/data",
"method": "GET"
}
}
Handling Web Activity Output
{
"name": "ProcessApiResponse",
"properties": {
"activities": [
{
"name": "CallApi",
"type": "WebActivity",
"typeProperties": {
"url": "https://api.service.com/orders",
"method": "GET"
}
},
{
"name": "CheckResponse",
"type": "IfCondition",
"dependsOn": [
{
"activity": "CallApi",
"dependencyConditions": ["Succeeded"]
}
],
"typeProperties": {
"expression": {
"value": "@greater(length(activity('CallApi').output.data), 0)",
"type": "Expression"
},
"ifTrueActivities": [
{
"name": "ProcessData",
"type": "ForEach",
"typeProperties": {
"items": {
"value": "@activity('CallApi').output.data",
"type": "Expression"
},
"activities": [
{
"name": "InsertRecord",
"type": "SqlServerStoredProcedure",
"typeProperties": {
"storedProcedureName": "sp_InsertOrder",
"storedProcedureParameters": {
"OrderId": { "value": "@item().id" },
"CustomerName": { "value": "@item().customer.name" },
"Amount": { "value": "@item().totalAmount" }
}
}
}
]
}
}
]
}
}
]
}
}
Paginated API Calls
{
"name": "PaginatedApiPipeline",
"properties": {
"variables": {
"hasMorePages": { "type": "Bool", "defaultValue": "true" },
"pageNumber": { "type": "Int", "defaultValue": "1" },
"allResults": { "type": "Array" }
},
"activities": [
{
"name": "FetchAllPages",
"type": "Until",
"typeProperties": {
"expression": {
"value": "@equals(variables('hasMorePages'), false)",
"type": "Expression"
},
"timeout": "0.01:00:00",
"activities": [
{
"name": "FetchPage",
"type": "WebActivity",
"typeProperties": {
"url": {
"value": "@concat('https://api.service.com/data?page=', variables('pageNumber'), '&pageSize=100')",
"type": "Expression"
},
"method": "GET"
}
},
{
"name": "ProcessPage",
"type": "Copy",
"dependsOn": [
{
"activity": "FetchPage",
"dependencyConditions": ["Succeeded"]
}
],
"typeProperties": {
"source": {
"type": "RestSource",
"additionalColumns": [
{
"name": "PageNumber",
"value": {
"value": "@string(variables('pageNumber'))",
"type": "Expression"
}
}
]
},
"sink": { "type": "ParquetSink" }
}
},
{
"name": "CheckMorePages",
"type": "SetVariable",
"dependsOn": [
{
"activity": "ProcessPage",
"dependencyConditions": ["Succeeded"]
}
],
"typeProperties": {
"variableName": "hasMorePages",
"value": {
"value": "@activity('FetchPage').output.hasMorePages",
"type": "Expression"
}
}
},
{
"name": "IncrementPage",
"type": "SetVariable",
"dependsOn": [
{
"activity": "CheckMorePages",
"dependencyConditions": ["Succeeded"]
}
],
"typeProperties": {
"variableName": "pageNumber",
"value": {
"value": "@add(variables('pageNumber'), 1)",
"type": "Expression"
}
}
}
]
}
}
]
}
}
Webhook Integration
{
"name": "WebhookNotification",
"type": "WebActivity",
"typeProperties": {
"url": "https://hooks.slack.com/services/xxx/yyy/zzz",
"method": "POST",
"headers": {
"Content-Type": "application/json"
},
"body": {
"value": "@json(concat('{\"text\": \"Pipeline ', pipeline().Pipeline, ' completed with status: ', pipeline().parameters.status, '\", \"channel\": \"#data-ops\"}'))",
"type": "Expression"
}
}
}
Secure Input/Output
{
"name": "SecureWebActivity",
"type": "WebActivity",
"policy": {
"secureInput": true,
"secureOutput": true
},
"typeProperties": {
"url": "https://api.service.com/sensitive-data",
"method": "POST",
"headers": {
"Authorization": {
"value": "@concat('Bearer ', pipeline().parameters.sensitiveToken)",
"type": "Expression"
}
},
"body": {
"value": "@pipeline().parameters.sensitivePayload",
"type": "Expression"
}
}
}
Best Practices
- Use Managed Identity: When possible, avoid storing credentials
- Set appropriate timeouts: Prevent hanging pipelines
- Handle pagination: For APIs returning large datasets
- Secure sensitive data: Use secureInput/secureOutput
- Implement retry logic: Handle transient failures
The Web Activity opens up endless integration possibilities, allowing Azure Data Factory to orchestrate workflows across diverse systems and services through REST APIs.