Skip to content
Back to Blog
1 min read

Debug Sessions for Cognitive Search Skillsets

I wrote “Debug Sessions for Cognitive Search Skillsets” to share practical, production-minded guidance on this topic.

Understanding Debug Sessions

Debug sessions provide:

  • Real-time skillset execution testing
  • Step-through debugging of individual skills
  • Input/output inspection at each stage
  • Immediate feedback on changes

Creating a Debug Session

import requests
import json

def create_debug_session(endpoint, api_key, indexer_name, document_key=None):
    """Create a debug session for skillset testing"""

    url = f"{endpoint}/debugSessions?api-version=2021-04-30-Preview"
    headers = {
        "api-key": api_key,
        "Content-Type": "application/json"
    }

    body = {
        "name": f"debug-{indexer_name}",
        "indexerName": indexer_name,
        "storageConnectionString": storage_connection_string,  # For session data
    }

    if document_key:
        body["documentKey"] = document_key  # Debug specific document

    response = requests.post(url, headers=headers, json=body)
    return response.json()

# Create session
session = create_debug_session(endpoint, api_key, "my-indexer", "doc-123")
session_id = session["sessionId"]
print(f"Debug session created: {session_id}")

Running Debug Session

def run_debug_session(endpoint, api_key, session_id):
    """Execute the debug session"""

    url = f"{endpoint}/debugSessions/{session_id}/run?api-version=2021-04-30-Preview"
    headers = {"api-key": api_key}

    response = requests.post(url, headers=headers)
    return response.json()

def get_session_status(endpoint, api_key, session_id):
    """Get debug session status and results"""

    url = f"{endpoint}/debugSessions/{session_id}?api-version=2021-04-30-Preview"
    headers = {"api-key": api_key}

    response = requests.get(url, headers=headers)
    return response.json()

# Run and get results
run_debug_session(endpoint, api_key, session_id)

# Poll for completion
import time
while True:
    status = get_session_status(endpoint, api_key, session_id)
    if status["status"] in ["completed", "failed"]:
        break
    time.sleep(5)

print(f"Session status: {status['status']}")

Inspecting Skill Execution

def get_skill_execution_details(endpoint, api_key, session_id):
    """Get detailed execution info for each skill"""

    url = f"{endpoint}/debugSessions/{session_id}/executionInfo?api-version=2021-04-30-Preview"
    headers = {"api-key": api_key}

    response = requests.get(url, headers=headers)
    return response.json()

def analyze_skill_results(execution_info):
    """Analyze skill execution results"""

    for skill_result in execution_info.get("skillExecutionInfo", []):
        skill_name = skill_result.get("skillName")
        status = skill_result.get("status")
        duration = skill_result.get("durationMs")

        print(f"\nSkill: {skill_name}")
        print(f"  Status: {status}")
        print(f"  Duration: {duration}ms")

        # Check inputs
        inputs = skill_result.get("inputs", {})
        for input_name, input_value in inputs.items():
            value_preview = str(input_value)[:100] + "..." if len(str(input_value)) > 100 else str(input_value)
            print(f"  Input '{input_name}': {value_preview}")

        # Check outputs
        outputs = skill_result.get("outputs", {})
        for output_name, output_value in outputs.items():
            value_preview = str(output_value)[:100] + "..." if len(str(output_value)) > 100 else str(output_value)
            print(f"  Output '{output_name}': {value_preview}")

        # Check errors
        errors = skill_result.get("errors", [])
        for error in errors:
            print(f"  ERROR: {error.get('message')}")

# Analyze
execution_info = get_skill_execution_details(endpoint, api_key, session_id)
analyze_skill_results(execution_info)

Testing Skill Changes

def test_skill_change(endpoint, api_key, session_id, skill_definition):
    """Test a skill modification in debug session"""

    url = f"{endpoint}/debugSessions/{session_id}/skills?api-version=2021-04-30-Preview"
    headers = {
        "api-key": api_key,
        "Content-Type": "application/json"
    }

    response = requests.put(url, headers=headers, json=skill_definition)

    if response.status_code == 200:
        # Re-run session with new skill
        run_debug_session(endpoint, api_key, session_id)
        return get_skill_execution_details(endpoint, api_key, session_id)

    return None

# Test modified entity recognition skill
modified_skill = {
    "@odata.type": "#Microsoft.Skills.Text.V3.EntityRecognitionSkill",
    "name": "entity-recognition",
    "context": "/document",
    "categories": ["Person", "Organization", "Location", "DateTime", "Quantity"],  # Added categories
    "inputs": [
        {"name": "text", "source": "/document/content"}
    ],
    "outputs": [
        {"name": "persons", "targetName": "persons"},
        {"name": "organizations", "targetName": "organizations"},
        {"name": "locations", "targetName": "locations"},
        {"name": "dateTimes", "targetName": "dateTimes"},
        {"name": "quantities", "targetName": "quantities"}
    ]
}

result = test_skill_change(endpoint, api_key, session_id, modified_skill)

Debugging Common Issues

class SkillsetDebugger:
    """Helper class for debugging skillset issues"""

    def __init__(self, endpoint, api_key):
        self.endpoint = endpoint
        self.api_key = api_key

    def diagnose_empty_outputs(self, session_id):
        """Diagnose why skills produce empty outputs"""
        execution_info = get_skill_execution_details(
            self.endpoint, self.api_key, session_id
        )

        issues = []
        for skill in execution_info.get("skillExecutionInfo", []):
            outputs = skill.get("outputs", {})

            for output_name, output_value in outputs.items():
                if output_value is None or output_value == [] or output_value == "":
                    issues.append({
                        "skill": skill["skillName"],
                        "output": output_name,
                        "input_received": skill.get("inputs", {}),
                        "possible_causes": self._get_empty_output_causes(skill)
                    })

        return issues

    def _get_empty_output_causes(self, skill_result):
        """Identify possible causes for empty output"""
        causes = []

        # Check if input was empty
        inputs = skill_result.get("inputs", {})
        for input_name, input_value in inputs.items():
            if not input_value:
                causes.append(f"Empty input: {input_name}")

        # Check for truncation
        skill_name = skill_result.get("skillName", "")
        if "text" in skill_name.lower():
            causes.append("Text may be too short or in unsupported language")

        # Check for errors
        if skill_result.get("errors"):
            causes.append("Skill execution errors present")

        return causes

    def validate_skill_chain(self, session_id):
        """Validate that skill inputs/outputs chain correctly"""
        execution_info = get_skill_execution_details(
            self.endpoint, self.api_key, session_id
        )

        skills = execution_info.get("skillExecutionInfo", [])

        # Build output map
        available_outputs = {"/document": True}  # Root is always available

        issues = []
        for skill in skills:
            skill_name = skill["skillName"]

            # Check inputs are available
            for input_name, source in skill.get("inputMappings", {}).items():
                if source not in available_outputs:
                    issues.append({
                        "skill": skill_name,
                        "issue": f"Input '{input_name}' references unavailable source: {source}"
                    })

            # Add outputs to available
            for output_name in skill.get("outputs", {}).keys():
                context = skill.get("context", "/document")
                output_path = f"{context}/{output_name}"
                available_outputs[output_path] = True

        return issues

# Usage
debugger = SkillsetDebugger(endpoint, api_key)

# Diagnose empty outputs
empty_issues = debugger.diagnose_empty_outputs(session_id)
for issue in empty_issues:
    print(f"Empty output in {issue['skill']}.{issue['output']}")
    print(f"  Possible causes: {issue['possible_causes']}")

# Validate skill chain
chain_issues = debugger.validate_skill_chain(session_id)
for issue in chain_issues:
    print(f"Chain issue: {issue}")

Cleanup

def delete_debug_session(endpoint, api_key, session_id):
    """Delete debug session when done"""

    url = f"{endpoint}/debugSessions/{session_id}?api-version=2021-04-30-Preview"
    headers = {"api-key": api_key}

    response = requests.delete(url, headers=headers)
    return response.status_code == 204

# Clean up
delete_debug_session(endpoint, api_key, session_id)

Debug sessions are essential for developing and troubleshooting complex AI enrichment pipelines efficiently.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.