August 30, 2022 1 min read

Building Skillsets in Azure Cognitive Search

Azure Cognitive Search Skillsets AI Enrichment NLP

Skillsets define AI enrichment pipelines in Azure Cognitive Search. They chain together cognitive skills to extract insights from your content during indexing.

Skillset Architecture

A skillset consists of:

Skills: Individual AI operations
Inputs: Data sources for each skill
Outputs: Results stored for indexing or further processing
Context: The scope at which a skill operates

Built-in Cognitive Skills

from azure.search.documents.indexes import SearchIndexerClient
from azure.search.documents.indexes.models import (
    SearchIndexerSkillset,
    EntityRecognitionSkill,
    KeyPhraseExtractionSkill,
    LanguageDetectionSkill,
    SentimentSkill,
    PIIDetectionSkill,
    EntityLinkingSkill,
    OcrSkill,
    ImageAnalysisSkill,
    MergeSkill,
    SplitSkill,
    TextTranslationSkill,
    InputFieldMappingEntry,
    OutputFieldMappingEntry
)
from azure.core.credentials import AzureKeyCredential

endpoint = "https://mysearchservice.search.windows.net"
credential = AzureKeyCredential("your-admin-key")
indexer_client = SearchIndexerClient(endpoint=endpoint, credential=credential)

# Language Detection
language_skill = LanguageDetectionSkill(
    name="language-detection",
    description="Detect document language",
    context="/document",
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content")
    ],
    outputs=[
        OutputFieldMappingEntry(name="languageCode", target_name="language")
    ]
)

# Entity Recognition
entity_skill = EntityRecognitionSkill(
    name="entity-recognition",
    description="Extract named entities",
    context="/document",
    categories=["Person", "Organization", "Location", "DateTime", "URL", "Email"],
    default_language_code="en",
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content"),
        InputFieldMappingEntry(name="languageCode", source="/document/language")
    ],
    outputs=[
        OutputFieldMappingEntry(name="persons", target_name="persons"),
        OutputFieldMappingEntry(name="organizations", target_name="organizations"),
        OutputFieldMappingEntry(name="locations", target_name="locations"),
        OutputFieldMappingEntry(name="urls", target_name="urls"),
        OutputFieldMappingEntry(name="emails", target_name="emails")
    ]
)

# Key Phrase Extraction
keyphrase_skill = KeyPhraseExtractionSkill(
    name="keyphrase-extraction",
    description="Extract key phrases",
    context="/document",
    default_language_code="en",
    max_key_phrase_count=10,
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content"),
        InputFieldMappingEntry(name="languageCode", source="/document/language")
    ],
    outputs=[
        OutputFieldMappingEntry(name="keyPhrases", target_name="keyPhrases")
    ]
)

# Sentiment Analysis
sentiment_skill = SentimentSkill(
    name="sentiment-analysis",
    description="Analyze sentiment",
    context="/document",
    default_language_code="en",
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content"),
        InputFieldMappingEntry(name="languageCode", source="/document/language")
    ],
    outputs=[
        OutputFieldMappingEntry(name="sentiment", target_name="sentiment"),
        OutputFieldMappingEntry(name="confidenceScores", target_name="sentimentScores")
    ]
)

# PII Detection
pii_skill = PIIDetectionSkill(
    name="pii-detection",
    description="Detect and redact PII",
    context="/document",
    default_language_code="en",
    pii_categories=["Email", "PhoneNumber", "CreditCardNumber", "SSN"],
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content")
    ],
    outputs=[
        OutputFieldMappingEntry(name="piiEntities", target_name="piiEntities"),
        OutputFieldMappingEntry(name="maskedText", target_name="redactedContent")
    ]
)

Image Processing Skills

# OCR Skill
ocr_skill = OcrSkill(
    name="ocr-skill",
    description="Extract text from images",
    context="/document/normalized_images/*",
    default_language_code="en",
    detect_orientation=True,
    inputs=[
        InputFieldMappingEntry(name="image", source="/document/normalized_images/*")
    ],
    outputs=[
        OutputFieldMappingEntry(name="text", target_name="extractedText"),
        OutputFieldMappingEntry(name="layoutText", target_name="layoutText")
    ]
)

# Image Analysis Skill
image_analysis_skill = ImageAnalysisSkill(
    name="image-analysis",
    description="Analyze image content",
    context="/document/normalized_images/*",
    visual_features=["Tags", "Description", "Categories", "Faces", "Objects"],
    inputs=[
        InputFieldMappingEntry(name="image", source="/document/normalized_images/*")
    ],
    outputs=[
        OutputFieldMappingEntry(name="tags", target_name="imageTags"),
        OutputFieldMappingEntry(name="description", target_name="imageDescription"),
        OutputFieldMappingEntry(name="categories", target_name="imageCategories")
    ]
)

# Merge skill to combine OCR text
merge_skill = MergeSkill(
    name="merge-content",
    description="Merge text content with OCR results",
    context="/document",
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content"),
        InputFieldMappingEntry(name="itemsToInsert", source="/document/normalized_images/*/extractedText"),
        InputFieldMappingEntry(name="offsets", source="/document/normalized_images/*/contentOffset")
    ],
    outputs=[
        OutputFieldMappingEntry(name="mergedText", target_name="mergedContent")
    ]
)

Text Processing Skills

# Split Skill - for processing long documents
split_skill = SplitSkill(
    name="split-text",
    description="Split long documents into pages",
    context="/document",
    text_split_mode="pages",
    maximum_page_length=5000,
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content")
    ],
    outputs=[
        OutputFieldMappingEntry(name="textItems", target_name="pages")
    ]
)

# Process each page
page_entity_skill = EntityRecognitionSkill(
    name="page-entity-recognition",
    context="/document/pages/*",  # Process each page
    categories=["Person", "Organization"],
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/pages/*")
    ],
    outputs=[
        OutputFieldMappingEntry(name="persons", target_name="persons"),
        OutputFieldMappingEntry(name="organizations", target_name="organizations")
    ]
)

# Translation Skill
translation_skill = TextTranslationSkill(
    name="translate-to-english",
    description="Translate content to English",
    context="/document",
    default_from_language_code="auto",
    default_to_language_code="en",
    inputs=[
        InputFieldMappingEntry(name="text", source="/document/content")
    ],
    outputs=[
        OutputFieldMappingEntry(name="translatedText", target_name="translatedContent")
    ]
)

Creating the Complete Skillset

# Combine all skills into a skillset
skillset = SearchIndexerSkillset(
    name="comprehensive-skillset",
    description="Full AI enrichment pipeline",
    skills=[
        language_skill,
        entity_skill,
        keyphrase_skill,
        sentiment_skill,
        pii_skill,
        ocr_skill,
        image_analysis_skill,
        merge_skill
    ],
    cognitive_services_account={
        "@odata.type": "#Microsoft.Azure.Search.CognitiveServicesByKey",
        "key": cognitive_services_key
    }
)

# Create or update skillset
indexer_client.create_or_update_skillset(skillset)
print(f"Skillset '{skillset.name}' created")

Skillset Best Practices

class SkillsetBuilder:
    """Helper class for building skillsets"""

    def __init__(self, name, cognitive_key):
        self.name = name
        self.cognitive_key = cognitive_key
        self.skills = []

    def add_language_detection(self):
        """Add language detection as first skill"""
        skill = LanguageDetectionSkill(
            name="detect-language",
            context="/document",
            inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
            outputs=[OutputFieldMappingEntry(name="languageCode", target_name="language")]
        )
        self.skills.append(skill)
        return self

    def add_text_enrichment(self, include_pii=False):
        """Add standard text enrichment skills"""
        # Entity recognition
        self.skills.append(EntityRecognitionSkill(
            name="extract-entities",
            context="/document",
            inputs=[
                InputFieldMappingEntry(name="text", source="/document/content"),
                InputFieldMappingEntry(name="languageCode", source="/document/language")
            ],
            outputs=[
                OutputFieldMappingEntry(name="persons", target_name="persons"),
                OutputFieldMappingEntry(name="organizations", target_name="organizations"),
                OutputFieldMappingEntry(name="locations", target_name="locations")
            ]
        ))

        # Key phrases
        self.skills.append(KeyPhraseExtractionSkill(
            name="extract-keyphrases",
            context="/document",
            inputs=[
                InputFieldMappingEntry(name="text", source="/document/content"),
                InputFieldMappingEntry(name="languageCode", source="/document/language")
            ],
            outputs=[OutputFieldMappingEntry(name="keyPhrases", target_name="keyPhrases")]
        ))

        # Sentiment
        self.skills.append(SentimentSkill(
            name="analyze-sentiment",
            context="/document",
            inputs=[
                InputFieldMappingEntry(name="text", source="/document/content"),
                InputFieldMappingEntry(name="languageCode", source="/document/language")
            ],
            outputs=[OutputFieldMappingEntry(name="sentiment", target_name="sentiment")]
        ))

        if include_pii:
            self.skills.append(PIIDetectionSkill(
                name="detect-pii",
                context="/document",
                inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                outputs=[
                    OutputFieldMappingEntry(name="piiEntities", target_name="piiEntities"),
                    OutputFieldMappingEntry(name="maskedText", target_name="redactedContent")
                ]
            ))

        return self

    def add_image_processing(self):
        """Add image processing skills"""
        self.skills.append(OcrSkill(
            name="extract-image-text",
            context="/document/normalized_images/*",
            inputs=[InputFieldMappingEntry(name="image", source="/document/normalized_images/*")],
            outputs=[OutputFieldMappingEntry(name="text", target_name="extractedText")]
        ))

        self.skills.append(ImageAnalysisSkill(
            name="analyze-images",
            context="/document/normalized_images/*",
            visual_features=["Tags", "Description"],
            inputs=[InputFieldMappingEntry(name="image", source="/document/normalized_images/*")],
            outputs=[
                OutputFieldMappingEntry(name="tags", target_name="imageTags"),
                OutputFieldMappingEntry(name="description", target_name="imageDescription")
            ]
        ))

        return self

    def build(self):
        """Build the skillset"""
        return SearchIndexerSkillset(
            name=self.name,
            skills=self.skills,
            cognitive_services_account={
                "@odata.type": "#Microsoft.Azure.Search.CognitiveServicesByKey",
                "key": self.cognitive_key
            }
        )

# Usage
skillset = (SkillsetBuilder("my-skillset", cognitive_key)
    .add_language_detection()
    .add_text_enrichment(include_pii=True)
    .add_image_processing()
    .build())

indexer_client.create_or_update_skillset(skillset)

Skillsets enable powerful AI-driven content enrichment during the indexing process.