Skip to content
Back to Blog
1 min read

Microsoft Fabric AI Skills: Natural Language Analytics

I wrote “Microsoft Fabric AI Skills: Natural Language Analytics” to share practical, production-minded guidance on this topic.

Understanding AI Skills

AI Skills are configurable natural language interfaces to your Fabric data:

User Question → AI Skill → SQL Generation → Execution → Natural Language Response
                   ↓
            [Schema Context + Instructions + Guardrails]

Creating Your First AI Skill

Basic Setup

# AI Skills are created via Fabric portal or REST API
# Below shows the configuration pattern using REST API
from azure.identity import DefaultAzureCredential
import requests
import json

credential = DefaultAzureCredential()
token = credential.get_token("https://api.fabric.microsoft.com/.default").token
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
base_url = "https://api.fabric.microsoft.com/v1"

workspace_id = "your-workspace-id"

# AI Skill configuration
ai_skill_config = {
    "displayName": "SalesAnalyst",
    "type": "AISkill",
    "description": "Analyze sales data using natural language questions",

    # Configuration stored as part of the item definition
    "definition": {
        "dataSources": [{
            "type": "lakehouse",
            "workspace": "SalesAnalytics",
            "name": "SalesLakehouse",
            "tables": ["gold.sales_fact", "dim.products", "dim.customers", "dim.regions"]
        }],

        "instructions": """You are a sales data analyst.
        Help users understand sales performance, trends, and patterns.

        Key metrics:
        - Revenue: SUM(sales_fact.amount)
        - Units: SUM(sales_fact.quantity)
        - Average Order Value: AVG(sales_fact.amount)
        - Customer Count: COUNT(DISTINCT customer_id)

        When answering:
        1. Always include the time period in your response
        2. Compare to previous periods when relevant
        3. Highlight notable trends or anomalies
        4. Suggest follow-up questions""",

        "examples": [
            {
                "question": "What were our sales last month?",
                "sql": """
                    SELECT
                        SUM(amount) as total_revenue,
                        SUM(quantity) as total_units,
                        COUNT(DISTINCT customer_id) as unique_customers
                    FROM gold.sales_fact
                    WHERE date >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month')
                      AND date < DATE_TRUNC('month', CURRENT_DATE)
                """
            },
            {
                "question": "Top 5 products by revenue",
                "sql": """
                    SELECT
                        p.product_name,
                        SUM(s.amount) as revenue
                    FROM gold.sales_fact s
                    JOIN dim.products p ON s.product_id = p.product_id
                    GROUP BY p.product_name
                    ORDER BY revenue DESC
                    LIMIT 5
                """
            }
        ]
    }
}

# Create AI Skill via REST API
response = requests.post(
    f"{base_url}/workspaces/{workspace_id}/items",
    headers=headers,
    json=ai_skill_config
)
print(f"AI Skill created: {response.json()}")

Adding Guardrails

# Guardrails are configured as part of the AI Skill definition
# These are applied when the skill processes queries

guardrails_config = {
    # Row-level security based on user
    "rowFilters": [{
        "table": "gold.sales_fact",
        "condition": "region_id IN (SELECT region_id FROM user_regions WHERE user_email = '{user_email}')"
    }],

    # Column masking for sensitive data
    "columnMasks": [{
        "table": "dim.customers",
        "column": "email",
        "maskFunction": "CONCAT(LEFT(email, 3), '***@', SUBSTRING_INDEX(email, '@', -1))"
    }],

    # Query restrictions
    "restrictions": [
        {
            "type": "row_limit",
            "config": {"max_rows": 10000}
        },
        {
            "type": "blocked_operations",
            "config": {"operations": ["DELETE", "UPDATE", "INSERT", "DROP", "ALTER"]}
        },
        {
            "type": "blocked_tables",
            "config": {"tables": ["raw.*", "staging.*", "pii.*"]}
        },
        {
            "type": "query_timeout",
            "config": {"timeout_seconds": 30}
        }
    ]
}

# Update AI Skill with guardrails via REST API
skill_id = "your-skill-id"
update_payload = {
    "definition": {
        "guardrails": guardrails_config
    }
}

response = requests.patch(
    f"{base_url}/workspaces/{workspace_id}/items/{skill_id}",
    headers=headers,
    json=update_payload
)
print(f"Guardrails added: {response.status_code}")

Advanced Schema Configuration

Semantic Layer Integration

# Semantic layer configuration for AI Skills
# This metadata helps the AI generate better queries

semantic_layer_config = {
    "name": "SalesSemantics",

    "measures": [
        {
            "name": "Total Revenue",
            "expression": "SUM(sales_fact.amount)",
            "format": "currency",
            "description": "Total sales revenue in USD"
        },
        {
            "name": "Units Sold",
            "expression": "SUM(sales_fact.quantity)",
            "format": "number",
            "description": "Total number of units sold"
        },
        {
            "name": "Average Order Value",
            "expression": "AVG(sales_fact.amount)",
            "format": "currency",
            "description": "Average revenue per order"
        },
        {
            "name": "Year-over-Year Growth",
            "expression": """
                (SUM(CASE WHEN YEAR(date) = YEAR(CURRENT_DATE) THEN amount END) -
                 SUM(CASE WHEN YEAR(date) = YEAR(CURRENT_DATE) - 1 THEN amount END)) /
                NULLIF(SUM(CASE WHEN YEAR(date) = YEAR(CURRENT_DATE) - 1 THEN amount END), 0) * 100
            """,
            "format": "percentage",
            "description": "Year-over-year revenue growth"
        }
    ],

    "dimensions": [
        {
            "name": "Product Category",
            "expression": "products.category",
            "hierarchy": ["products.category", "products.subcategory", "products.product_name"]
        },
        {
            "name": "Region",
            "expression": "regions.region_name",
            "hierarchy": ["regions.country", "regions.region_name", "regions.city"]
        },
        {
            "name": "Time",
            "expression": "sales_fact.date",
            "hierarchy": ["YEAR(date)", "QUARTER(date)", "MONTH(date)", "date"]
        }
    ],

    "relationships": [
        {"from": "sales_fact.product_id", "to": "products.product_id"},
        {"from": "sales_fact.customer_id", "to": "customers.customer_id"},
        {"from": "sales_fact.region_id", "to": "regions.region_id"}
    ]
}

# Add semantic layer to AI Skill via REST API update
update_payload = {
    "definition": {
        "semanticLayer": semantic_layer_config
    }
}

response = requests.patch(
    f"{base_url}/workspaces/{workspace_id}/items/{skill_id}",
    headers=headers,
    json=update_payload
)

Custom Functions

# Custom SQL functions are created in the Lakehouse/Warehouse
# and referenced in the AI Skill configuration

from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

# Create custom functions in the Lakehouse SQL endpoint
spark.sql("""
    CREATE OR REPLACE FUNCTION get_fiscal_quarter(d DATE)
    RETURNS STRING
    RETURN CONCAT('FY', YEAR(d) + CASE WHEN MONTH(d) >= 4 THEN 1 ELSE 0 END,
                  ' Q', CAST(CEIL((MONTH(d) + 9) % 12 / 3.0) AS INT))
""")

spark.sql("""
    CREATE OR REPLACE FUNCTION calculate_moving_average(metric_value DOUBLE, partition_col STRING, order_col DATE)
    RETURNS DOUBLE
    RETURN AVG(metric_value) OVER (
        PARTITION BY partition_col
        ORDER BY order_col
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    )
""")

# Reference these functions in the AI Skill configuration
custom_functions_config = [
    {
        "name": "get_fiscal_quarter",
        "description": "Convert date to fiscal quarter (FY starts April)",
        "usage": "get_fiscal_quarter(date_column)"
    },
    {
        "name": "calculate_moving_average",
        "description": "Calculate 7-day moving average",
        "usage": "calculate_moving_average(value, partition_col, date_col)"
    }
]

# Update AI Skill to include function documentation
update_payload = {
    "definition": {
        "customFunctions": custom_functions_config
    }
}

response = requests.patch(
    f"{base_url}/workspaces/{workspace_id}/items/{skill_id}",
    headers=headers,
    json=update_payload
)

Using AI Skills

Direct Query Interface

# Query AI Skills via REST API
from azure.identity import DefaultAzureCredential
import requests

credential = DefaultAzureCredential()
token = credential.get_token("https://api.fabric.microsoft.com/.default").token
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

workspace_id = "your-workspace-id"
skill_id = "your-skill-id"

def ask_ai_skill(question: str, context: dict = None) -> dict:
    """Query an AI Skill with a natural language question."""
    payload = {
        "question": question
    }
    if context:
        payload["context"] = context

    response = requests.post(
        f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{skill_id}/query",
        headers=headers,
        json=payload
    )
    return response.json()

# Simple question
result = ask_ai_skill(
    question="How did we perform last quarter compared to the same quarter last year?"
)

print(f"Answer: {result.get('answer')}")
print(f"SQL: {result.get('sqlQuery')}")
print(f"Data: {result.get('data')}")
print(f"Confidence: {result.get('confidenceScore')}")

# With context
result = ask_ai_skill(
    question="Show me the trend",
    context={
        "previous_question": "What are our top 5 products?",
        "previous_result": previous_result.get('data'),
        "filters": {"region": "North America"}
    }
)

Conversational Interface

# Conversational interface using session-based queries
import uuid

class AISkillConversation:
    def __init__(self, workspace_id: str, skill_id: str):
        self.workspace_id = workspace_id
        self.skill_id = skill_id
        self.session_id = str(uuid.uuid4())
        self.history = []

    def ask(self, question: str) -> dict:
        """Ask a question in the conversation context."""
        payload = {
            "question": question,
            "sessionId": self.session_id,
            "conversationHistory": self.history
        }

        response = requests.post(
            f"https://api.fabric.microsoft.com/v1/workspaces/{self.workspace_id}/items/{self.skill_id}/query",
            headers=headers,
            json=payload
        )

        result = response.json()
        self.history.append({"question": question, "answer": result.get("answer")})
        return result

    def summarize(self) -> str:
        """Get a summary of the conversation."""
        return "\n".join([f"Q: {h['question']}\nA: {h['answer']}" for h in self.history])

# Start a conversation
conversation = AISkillConversation(workspace_id, skill_id)

# Multi-turn interaction
response1 = conversation.ask("What were our sales last month?")
print(response1.get("answer"))

response2 = conversation.ask("Break that down by product category")
print(response2.get("answer"))

response3 = conversation.ask("Which category grew the most vs previous month?")
print(response3.get("answer"))

# Get conversation summary
summary = conversation.summarize()
print(summary)

Integration with Power BI

# Use Semantic Link for Power BI integration with AI Skills
import sempy.fabric as fabric
from openai import AzureOpenAI

# Azure OpenAI client for DAX generation
aoai_client = AzureOpenAI(
    azure_endpoint="https://your-aoai.openai.azure.com/",
    api_version="2024-02-01"
)

def generate_dax_from_natural_language(question: str, workspace: str, dataset: str) -> dict:
    """Generate DAX from natural language using AI and Semantic Link."""

    # Get model metadata using Semantic Link
    tables = fabric.list_tables(dataset=dataset, workspace=workspace)
    measures = fabric.list_measures(dataset=dataset, workspace=workspace)

    model_context = f"Tables: {tables.to_dict()}\nMeasures: {measures.to_dict()}"

    # Generate DAX using Azure OpenAI
    response = aoai_client.chat.completions.create(
        model="gpt-4o",
        messages=[{
            "role": "system",
            "content": f"You are a DAX expert. Generate a DAX measure based on the model:\n{model_context}"
        }, {
            "role": "user",
            "content": question
        }]
    )

    dax_code = response.choices[0].message.content

    return {
        "measure_code": dax_code,
        "suggested_visual": "Bar chart with category on axis and measure as value"
    }

# Generate DAX from natural language
dax = generate_dax_from_natural_language(
    question="Show revenue by category with year-over-year comparison",
    workspace="SalesAnalytics",
    dataset="SalesModel"
)

print(f"DAX Measure:\n{dax['measure_code']}")
print(f"Visual Suggestion: {dax['suggested_visual']}")

Monitoring and Optimization

# Monitor AI Skills using Fabric REST API and custom logging
from azure.identity import DefaultAzureCredential
import requests
from datetime import datetime, timedelta

credential = DefaultAzureCredential()
token = credential.get_token("https://api.fabric.microsoft.com/.default").token
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

def get_skill_usage(workspace_id: str, skill_id: str, days: int = 30) -> dict:
    """Get usage metrics for an AI Skill."""
    # Query activity logs via Fabric Admin API
    start_date = (datetime.now() - timedelta(days=days)).isoformat()

    response = requests.get(
        f"https://api.fabric.microsoft.com/v1/admin/activityevents",
        headers=headers,
        params={
            "startDateTime": start_date,
            "endDateTime": datetime.now().isoformat(),
            "filter": f"itemId eq '{skill_id}'"
        }
    )

    events = response.json().get("activityEventEntities", [])

    # Aggregate metrics
    total_questions = len(events)
    unique_users = len(set(e.get("userId") for e in events))
    successful = sum(1 for e in events if e.get("status") == "Succeeded")
    success_rate = (successful / total_questions * 100) if total_questions > 0 else 0

    return {
        "total_questions": total_questions,
        "unique_users": unique_users,
        "success_rate": round(success_rate, 1),
        "events": events
    }

# Get usage metrics
usage = get_skill_usage(workspace_id, skill_id, days=30)
print(f"Total questions: {usage['total_questions']}")
print(f"Unique users: {usage['unique_users']}")
print(f"Success rate: {usage['success_rate']}%")

# Analyze failed questions from logs
failed_events = [e for e in usage["events"] if e.get("status") == "Failed"]
for event in failed_events[:20]:
    print(f"Question: {event.get('parameters', {}).get('question')}")
    print(f"Error: {event.get('errorMessage')}")
    print("---")

AI Skills democratize data access while maintaining governance. Start with well-defined domains and expand based on user feedback.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.