3 min read
Semantic Search in Azure Cognitive Search
Semantic search uses AI to understand the intent behind queries, providing more relevant results than traditional keyword matching. Azure Cognitive Search now offers built-in semantic search capabilities.
Enabling Semantic Search
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
SearchIndex,
SemanticConfiguration,
SemanticField,
SemanticPrioritizedFields,
SemanticSettings
)
from azure.core.credentials import AzureKeyCredential
endpoint = "https://mysearchservice.search.windows.net"
credential = AzureKeyCredential("your-admin-key")
index_client = SearchIndexClient(endpoint=endpoint, credential=credential)
# Define semantic configuration
semantic_config = SemanticConfiguration(
name="my-semantic-config",
prioritized_fields=SemanticPrioritizedFields(
title_field=SemanticField(field_name="title"),
content_fields=[
SemanticField(field_name="content"),
SemanticField(field_name="summary")
],
keywords_fields=[
SemanticField(field_name="category"),
SemanticField(field_name="tags")
]
)
)
# Add semantic settings to index
index = SearchIndex(
name="articles-index",
fields=fields,
semantic_settings=SemanticSettings(
configurations=[semantic_config],
default_configuration="my-semantic-config"
)
)
index_client.create_or_update_index(index)
Semantic Queries
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType, QueryLanguage
search_client = SearchClient(
endpoint=endpoint,
index_name="articles-index",
credential=credential
)
# Semantic search with natural language query
results = search_client.search(
search_text="How do I deploy machine learning models to production?",
query_type=QueryType.SEMANTIC,
semantic_configuration_name="my-semantic-config",
query_language=QueryLanguage.EN_US,
top=10
)
for result in results:
print(f"\nTitle: {result['title']}")
print(f"Score: {result['@search.score']:.4f}")
print(f"Reranker Score: {result.get('@search.reranker_score', 'N/A')}")
Semantic Captions and Answers
from azure.search.documents.models import QueryCaptionType, QueryAnswerType
# Search with semantic captions and answers
results = search_client.search(
search_text="What are the benefits of using Azure ML for model deployment?",
query_type=QueryType.SEMANTIC,
semantic_configuration_name="my-semantic-config",
query_language=QueryLanguage.EN_US,
query_caption=QueryCaptionType.EXTRACTIVE,
query_answer=QueryAnswerType.EXTRACTIVE,
top=5
)
# Get semantic answers (direct answers to the query)
answers = results.get_answers()
if answers:
for answer in answers:
print(f"\nSemantic Answer:")
print(f" Text: {answer.text}")
print(f" Highlights: {answer.highlights}")
print(f" Score: {answer.score:.4f}")
# Get results with captions
for result in results:
print(f"\nTitle: {result['title']}")
# Get captions (relevant excerpts)
captions = result.get("@search.captions", [])
for caption in captions:
print(f" Caption: {caption.text}")
if caption.highlights:
print(f" Highlights: {caption.highlights}")
Comparing Keyword vs Semantic Search
def compare_search_modes(query):
"""Compare keyword and semantic search results"""
# Keyword search
keyword_results = search_client.search(
search_text=query,
query_type=QueryType.SIMPLE,
top=5
)
# Semantic search
semantic_results = search_client.search(
search_text=query,
query_type=QueryType.SEMANTIC,
semantic_configuration_name="my-semantic-config",
query_language=QueryLanguage.EN_US,
top=5
)
print(f"Query: {query}")
print("\n=== Keyword Search Results ===")
for i, result in enumerate(keyword_results, 1):
print(f"{i}. {result['title']} (Score: {result['@search.score']:.4f})")
print("\n=== Semantic Search Results ===")
for i, result in enumerate(semantic_results, 1):
reranker_score = result.get('@search.reranker_score', 'N/A')
print(f"{i}. {result['title']} (Reranker Score: {reranker_score})")
# Test with different queries
compare_search_modes("How to build ML pipelines")
compare_search_modes("best practices for data science projects")
compare_search_modes("deploy AI models")
Semantic Search with Filters
# Combine semantic search with filters
results = search_client.search(
search_text="machine learning deployment strategies",
query_type=QueryType.SEMANTIC,
semantic_configuration_name="my-semantic-config",
query_language=QueryLanguage.EN_US,
filter="category eq 'Technology' and rating gt 4",
order_by=["lastUpdated desc"],
top=10,
query_caption=QueryCaptionType.EXTRACTIVE
)
for result in results:
print(f"{result['title']} - {result['category']}")
captions = result.get("@search.captions", [])
if captions:
print(f" {captions[0].text}")
Multilingual Semantic Search
# Index with multiple languages
multilingual_semantic_config = SemanticConfiguration(
name="multilingual-config",
prioritized_fields=SemanticPrioritizedFields(
title_field=SemanticField(field_name="title"),
content_fields=[
SemanticField(field_name="content_en"),
SemanticField(field_name="content_de"),
SemanticField(field_name="content_fr")
]
)
)
# Search in different languages
def search_multilingual(query, language):
lang_map = {
"en": QueryLanguage.EN_US,
"de": QueryLanguage.DE_DE,
"fr": QueryLanguage.FR_FR
}
results = search_client.search(
search_text=query,
query_type=QueryType.SEMANTIC,
semantic_configuration_name="multilingual-config",
query_language=lang_map.get(language, QueryLanguage.EN_US),
top=5
)
return list(results)
# Search in English
en_results = search_multilingual("machine learning deployment", "en")
# Search in German
de_results = search_multilingual("maschinelles Lernen Bereitstellung", "de")
# Search in French
fr_results = search_multilingual("deploiement apprentissage automatique", "fr")
Performance Considerations
# Optimize semantic search performance
results = search_client.search(
search_text="How to optimize ML models",
query_type=QueryType.SEMANTIC,
semantic_configuration_name="my-semantic-config",
query_language=QueryLanguage.EN_US,
# Pre-filter with keyword search to reduce semantic ranking scope
search_fields=["title", "content"],
# Limit fields to reduce payload
select=["id", "title", "summary"],
# Limit results before semantic ranking
top=50, # Initial retrieval
# Skip if not needed
query_caption=None, # Disable if not needed
query_answer=None # Disable if not needed
)
Semantic search dramatically improves search relevance by understanding user intent rather than just matching keywords.