Back to Blog
5 min read

Azure AI Foundry: Getting Started Guide for Data Professionals

Azure AI Foundry is Microsoft’s unified platform for building AI applications. For data professionals, it provides the infrastructure to add AI capabilities to data pipelines and analytics. Let’s get started.

What is Azure AI Foundry?

Azure AI Foundry brings together:

  • Model deployment and management
  • Prompt engineering tools
  • Evaluation frameworks
  • Agent development
  • Enterprise security and compliance

Setting Up Your First Project

Create an AI Foundry Project

from azure.ai.foundry import AIFoundryClient
from azure.identity import DefaultAzureCredential

# Initialize client
credential = DefaultAzureCredential()
client = AIFoundryClient(
    subscription_id="your-subscription-id",
    resource_group="your-resource-group",
    credential=credential
)

# Create a new project
project = client.projects.create(
    name="data-analytics-ai",
    description="AI capabilities for data analytics workflows",
    location="eastus"
)

print(f"Project created: {project.id}")

Deploy a Model

from azure.ai.foundry.models import ModelDeployment

# Deploy GPT-4o
deployment = client.deployments.create(
    project=project.name,
    name="gpt-4o-main",
    model="gpt-4o",
    sku="Standard",
    capacity=10  # Tokens per minute (in thousands)
)

# Deploy an embedding model
embedding_deployment = client.deployments.create(
    project=project.name,
    name="embeddings",
    model="text-embedding-3-large",
    sku="Standard",
    capacity=50
)

print(f"Deployments ready: {deployment.endpoint}")

Using the SDK

Chat Completions

from azure.ai.foundry import AIFoundryClient
from azure.identity import DefaultAzureCredential

client = AIFoundryClient(
    project="data-analytics-ai",
    credential=DefaultAzureCredential()
)

# Simple chat completion
response = client.chat.complete(
    deployment="gpt-4o-main",
    messages=[
        {"role": "system", "content": "You are a data analytics expert."},
        {"role": "user", "content": "Explain the difference between data lake and data warehouse."}
    ],
    temperature=0.7,
    max_tokens=500
)

print(response.choices[0].message.content)

Streaming Responses

# Stream for long responses
stream = client.chat.complete(
    deployment="gpt-4o-main",
    messages=[
        {"role": "user", "content": "Write a comprehensive guide to data modeling best practices."}
    ],
    stream=True
)

for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")

Embeddings for RAG

# Generate embeddings
embeddings = client.embeddings.create(
    deployment="embeddings",
    input=[
        "Microsoft Fabric is a unified analytics platform.",
        "Azure Synapse provides big data analytics.",
        "Power BI enables business intelligence."
    ]
)

for i, embedding in enumerate(embeddings.data):
    print(f"Text {i}: {len(embedding.embedding)} dimensions")

# Use with vector search
import numpy as np

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

query_embedding = client.embeddings.create(
    deployment="embeddings",
    input=["What is Fabric?"]
).data[0].embedding

# Find most similar document
similarities = [
    cosine_similarity(query_embedding, doc.embedding)
    for doc in embeddings.data
]
most_similar = np.argmax(similarities)
print(f"Most relevant document: {most_similar}")

Building a Data Assistant

Define Tools for Data Operations

from azure.ai.foundry.tools import Tool

# Define tools the AI can use
tools = [
    Tool(
        name="query_warehouse",
        description="Execute a SQL query against the data warehouse",
        parameters={
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The SQL query to execute"
                }
            },
            "required": ["query"]
        }
    ),
    Tool(
        name="get_table_schema",
        description="Get the schema of a database table",
        parameters={
            "type": "object",
            "properties": {
                "table_name": {
                    "type": "string",
                    "description": "Name of the table"
                }
            },
            "required": ["table_name"]
        }
    ),
    Tool(
        name="create_visualization",
        description="Create a chart from data",
        parameters={
            "type": "object",
            "properties": {
                "chart_type": {"type": "string", "enum": ["bar", "line", "pie", "scatter"]},
                "data": {"type": "string", "description": "JSON data for the chart"},
                "title": {"type": "string"}
            },
            "required": ["chart_type", "data"]
        }
    )
]

# Implement tool functions
def execute_tool(tool_name: str, arguments: dict):
    if tool_name == "query_warehouse":
        # Execute against your warehouse
        return execute_sql(arguments["query"])
    elif tool_name == "get_table_schema":
        return get_schema(arguments["table_name"])
    elif tool_name == "create_visualization":
        return create_chart(arguments)

Tool-Using Conversation

# Start conversation with tools
messages = [
    {"role": "system", "content": """You are a data analyst assistant.
    Use the available tools to help users analyze data.
    Always explain what you're doing."""},
    {"role": "user", "content": "What are the top 5 products by sales last month?"}
]

response = client.chat.complete(
    deployment="gpt-4o-main",
    messages=messages,
    tools=[t.to_dict() for t in tools],
    tool_choice="auto"
)

# Process tool calls
while response.choices[0].finish_reason == "tool_calls":
    tool_calls = response.choices[0].message.tool_calls

    # Add assistant message with tool calls
    messages.append(response.choices[0].message.to_dict())

    # Execute each tool and add results
    for tool_call in tool_calls:
        result = execute_tool(tool_call.function.name, tool_call.function.arguments)
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "content": str(result)
        })

    # Continue conversation
    response = client.chat.complete(
        deployment="gpt-4o-main",
        messages=messages,
        tools=[t.to_dict() for t in tools]
    )

print(response.choices[0].message.content)

Evaluation Framework

from azure.ai.foundry.evaluation import Evaluator, Dataset

# Create evaluation dataset
eval_dataset = Dataset(
    name="data_assistant_eval",
    examples=[
        {
            "input": "What is a data lakehouse?",
            "expected": "A data lakehouse combines data lake and warehouse features...",
            "context": "Data architecture documentation"
        },
        {
            "input": "How do I optimize a slow query?",
            "expected": "To optimize a slow query: 1. Check execution plan...",
            "context": "Query optimization guide"
        }
    ]
)

# Run evaluation
evaluator = Evaluator(client)

results = evaluator.evaluate(
    deployment="gpt-4o-main",
    dataset=eval_dataset,
    metrics=["relevance", "coherence", "groundedness", "fluency"],
    system_prompt="You are a data analytics expert."
)

print(f"Relevance: {results.metrics['relevance']:.2f}")
print(f"Coherence: {results.metrics['coherence']:.2f}")
print(f"Groundedness: {results.metrics['groundedness']:.2f}")
print(f"Fluency: {results.metrics['fluency']:.2f}")

Prompt Management

from azure.ai.foundry.prompts import PromptTemplate, PromptLibrary

# Create reusable prompts
sql_generator = PromptTemplate(
    name="sql_generator",
    template="""Given the following table schemas:
{{schemas}}

Generate a SQL query to answer: {{question}}

Requirements:
- Use standard SQL syntax
- Include appropriate JOINs if needed
- Add comments explaining the query
- Return only the SQL, no explanations

SQL Query:""",
    variables=["schemas", "question"]
)

# Save to library
library = PromptLibrary(project="data-analytics-ai")
library.save(sql_generator)

# Use the prompt
response = client.chat.complete(
    deployment="gpt-4o-main",
    messages=[
        {"role": "user", "content": sql_generator.render(
            schemas=table_schemas,
            question="What are the top customers by revenue?"
        )}
    ]
)

Best Practices

  1. Start with evaluation: Define success metrics before building
  2. Use system prompts: Guide model behavior consistently
  3. Implement guardrails: Validate inputs and outputs
  4. Monitor costs: Track token usage and optimize
  5. Version prompts: Treat prompts as code

Azure AI Foundry provides the foundation for enterprise AI applications. Start with simple use cases, measure results, and expand from there.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.