Back to Blog
4 min read

Azure Data Factory Web Activity: Integrating REST APIs in Data Pipelines

The Web Activity in Azure Data Factory enables calling REST endpoints from within your data pipelines. It’s essential for integrating with external services, triggering workflows, and retrieving data from APIs.

Basic Web Activity

{
    "name": "CallRestApi",
    "type": "WebActivity",
    "typeProperties": {
        "url": "https://api.service.com/v1/data",
        "method": "GET",
        "headers": {
            "Content-Type": "application/json",
            "Accept": "application/json"
        },
        "authentication": {
            "type": "MSI",
            "resource": "https://api.service.com"
        }
    }
}

POST Request with Body

{
    "name": "PostToApi",
    "type": "WebActivity",
    "typeProperties": {
        "url": "https://api.service.com/v1/process",
        "method": "POST",
        "headers": {
            "Content-Type": "application/json"
        },
        "body": {
            "value": "@json(concat('{\"jobId\": \"', pipeline().RunId, '\", \"tableName\": \"', pipeline().parameters.tableName, '\", \"timestamp\": \"', utcnow(), '\"}'))",
            "type": "Expression"
        }
    }
}

Authentication Methods

API Key Authentication

{
    "name": "ApiKeyAuth",
    "type": "WebActivity",
    "typeProperties": {
        "url": "https://api.service.com/data",
        "method": "GET",
        "headers": {
            "X-API-Key": {
                "value": "@pipeline().parameters.apiKey",
                "type": "Expression"
            }
        }
    }
}

OAuth Bearer Token

{
    "name": "GetOAuthToken",
    "type": "WebActivity",
    "typeProperties": {
        "url": "https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token",
        "method": "POST",
        "headers": {
            "Content-Type": "application/x-www-form-urlencoded"
        },
        "body": "grant_type=client_credentials&client_id={client_id}&client_secret={secret}&scope=https://api.service.com/.default"
    }
}
{
    "name": "CallApiWithToken",
    "type": "WebActivity",
    "dependsOn": [
        {
            "activity": "GetOAuthToken",
            "dependencyConditions": ["Succeeded"]
        }
    ],
    "typeProperties": {
        "url": "https://api.service.com/data",
        "method": "GET",
        "headers": {
            "Authorization": {
                "value": "@concat('Bearer ', activity('GetOAuthToken').output.access_token)",
                "type": "Expression"
            }
        }
    }
}

Managed Identity Authentication

{
    "name": "MsiAuthWebActivity",
    "type": "WebActivity",
    "typeProperties": {
        "url": "https://management.azure.com/subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.Sql/servers/{server}/databases?api-version=2021-02-01-preview",
        "method": "GET",
        "authentication": {
            "type": "MSI",
            "resource": "https://management.azure.com"
        }
    }
}

Calling Azure Functions

{
    "name": "CallAzureFunction",
    "type": "WebActivity",
    "typeProperties": {
        "url": {
            "value": "@concat('https://', pipeline().parameters.functionAppName, '.azurewebsites.net/api/ProcessData')",
            "type": "Expression"
        },
        "method": "POST",
        "headers": {
            "Content-Type": "application/json",
            "x-functions-key": {
                "value": "@pipeline().parameters.functionKey",
                "type": "Expression"
            }
        },
        "body": {
            "value": "@pipeline().parameters.functionInput",
            "type": "Expression"
        }
    },
    "linkedServiceName": {
        "referenceName": "AzureFunctionLinkedService",
        "type": "LinkedServiceReference"
    }
}

Error Handling and Retry

{
    "name": "WebActivityWithRetry",
    "type": "WebActivity",
    "policy": {
        "timeout": "0.00:10:00",
        "retry": 3,
        "retryIntervalInSeconds": 30,
        "secureOutput": false,
        "secureInput": false
    },
    "typeProperties": {
        "url": "https://api.service.com/data",
        "method": "GET"
    }
}

Handling Web Activity Output

{
    "name": "ProcessApiResponse",
    "properties": {
        "activities": [
            {
                "name": "CallApi",
                "type": "WebActivity",
                "typeProperties": {
                    "url": "https://api.service.com/orders",
                    "method": "GET"
                }
            },
            {
                "name": "CheckResponse",
                "type": "IfCondition",
                "dependsOn": [
                    {
                        "activity": "CallApi",
                        "dependencyConditions": ["Succeeded"]
                    }
                ],
                "typeProperties": {
                    "expression": {
                        "value": "@greater(length(activity('CallApi').output.data), 0)",
                        "type": "Expression"
                    },
                    "ifTrueActivities": [
                        {
                            "name": "ProcessData",
                            "type": "ForEach",
                            "typeProperties": {
                                "items": {
                                    "value": "@activity('CallApi').output.data",
                                    "type": "Expression"
                                },
                                "activities": [
                                    {
                                        "name": "InsertRecord",
                                        "type": "SqlServerStoredProcedure",
                                        "typeProperties": {
                                            "storedProcedureName": "sp_InsertOrder",
                                            "storedProcedureParameters": {
                                                "OrderId": { "value": "@item().id" },
                                                "CustomerName": { "value": "@item().customer.name" },
                                                "Amount": { "value": "@item().totalAmount" }
                                            }
                                        }
                                    }
                                ]
                            }
                        }
                    ]
                }
            }
        ]
    }
}

Paginated API Calls

{
    "name": "PaginatedApiPipeline",
    "properties": {
        "variables": {
            "hasMorePages": { "type": "Bool", "defaultValue": "true" },
            "pageNumber": { "type": "Int", "defaultValue": "1" },
            "allResults": { "type": "Array" }
        },
        "activities": [
            {
                "name": "FetchAllPages",
                "type": "Until",
                "typeProperties": {
                    "expression": {
                        "value": "@equals(variables('hasMorePages'), false)",
                        "type": "Expression"
                    },
                    "timeout": "0.01:00:00",
                    "activities": [
                        {
                            "name": "FetchPage",
                            "type": "WebActivity",
                            "typeProperties": {
                                "url": {
                                    "value": "@concat('https://api.service.com/data?page=', variables('pageNumber'), '&pageSize=100')",
                                    "type": "Expression"
                                },
                                "method": "GET"
                            }
                        },
                        {
                            "name": "ProcessPage",
                            "type": "Copy",
                            "dependsOn": [
                                {
                                    "activity": "FetchPage",
                                    "dependencyConditions": ["Succeeded"]
                                }
                            ],
                            "typeProperties": {
                                "source": {
                                    "type": "RestSource",
                                    "additionalColumns": [
                                        {
                                            "name": "PageNumber",
                                            "value": {
                                                "value": "@string(variables('pageNumber'))",
                                                "type": "Expression"
                                            }
                                        }
                                    ]
                                },
                                "sink": { "type": "ParquetSink" }
                            }
                        },
                        {
                            "name": "CheckMorePages",
                            "type": "SetVariable",
                            "dependsOn": [
                                {
                                    "activity": "ProcessPage",
                                    "dependencyConditions": ["Succeeded"]
                                }
                            ],
                            "typeProperties": {
                                "variableName": "hasMorePages",
                                "value": {
                                    "value": "@activity('FetchPage').output.hasMorePages",
                                    "type": "Expression"
                                }
                            }
                        },
                        {
                            "name": "IncrementPage",
                            "type": "SetVariable",
                            "dependsOn": [
                                {
                                    "activity": "CheckMorePages",
                                    "dependencyConditions": ["Succeeded"]
                                }
                            ],
                            "typeProperties": {
                                "variableName": "pageNumber",
                                "value": {
                                    "value": "@add(variables('pageNumber'), 1)",
                                    "type": "Expression"
                                }
                            }
                        }
                    ]
                }
            }
        ]
    }
}

Webhook Integration

{
    "name": "WebhookNotification",
    "type": "WebActivity",
    "typeProperties": {
        "url": "https://hooks.slack.com/services/xxx/yyy/zzz",
        "method": "POST",
        "headers": {
            "Content-Type": "application/json"
        },
        "body": {
            "value": "@json(concat('{\"text\": \"Pipeline ', pipeline().Pipeline, ' completed with status: ', pipeline().parameters.status, '\", \"channel\": \"#data-ops\"}'))",
            "type": "Expression"
        }
    }
}

Secure Input/Output

{
    "name": "SecureWebActivity",
    "type": "WebActivity",
    "policy": {
        "secureInput": true,
        "secureOutput": true
    },
    "typeProperties": {
        "url": "https://api.service.com/sensitive-data",
        "method": "POST",
        "headers": {
            "Authorization": {
                "value": "@concat('Bearer ', pipeline().parameters.sensitiveToken)",
                "type": "Expression"
            }
        },
        "body": {
            "value": "@pipeline().parameters.sensitivePayload",
            "type": "Expression"
        }
    }
}

Best Practices

  1. Use Managed Identity: When possible, avoid storing credentials
  2. Set appropriate timeouts: Prevent hanging pipelines
  3. Handle pagination: For APIs returning large datasets
  4. Secure sensitive data: Use secureInput/secureOutput
  5. Implement retry logic: Handle transient failures

The Web Activity opens up endless integration possibilities, allowing Azure Data Factory to orchestrate workflows across diverse systems and services through REST APIs.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.