Skip to content
Back to Blog
1 min read

Azure AI Search: Implementing Hybrid Search with Vectors and Keywords

I wrote “Azure AI Search: Implementing Hybrid Search with Vectors and Keywords” to share practical, production-minded guidance on this topic.

Setting Up Hybrid Search Index

Create an index that supports both keyword and vector search:

from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex, SearchField, SearchFieldDataType,
    VectorSearch, HnswAlgorithmConfiguration, VectorSearchProfile,
    SemanticConfiguration, SemanticField, SemanticPrioritizedFields, SemanticSearch
)
from azure.identity import DefaultAzureCredential

def create_hybrid_search_index(endpoint: str, index_name: str, vector_dimensions: int = 1536) -> SearchIndex:
    """Create an index supporting hybrid search."""

    client = SearchIndexClient(endpoint, DefaultAzureCredential())

    fields = [
        SearchField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="content", type=SearchFieldDataType.String, searchable=True, analyzer_name="en.microsoft"),
        SearchField(name="title", type=SearchFieldDataType.String, searchable=True),
        SearchField(
            name="content_vector",
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=vector_dimensions,
            vector_search_profile_name="vector-profile"
        ),
        SearchField(name="category", type=SearchFieldDataType.String, filterable=True)
    ]

    vector_search = VectorSearch(
        algorithms=[HnswAlgorithmConfiguration(name="hnsw-config", parameters={"m": 4, "efConstruction": 400, "efSearch": 500, "metric": "cosine"})],
        profiles=[VectorSearchProfile(name="vector-profile", algorithm_configuration_name="hnsw-config")]
    )

    return client.create_or_update_index(SearchIndex(name=index_name, fields=fields, vector_search=vector_search))

Executing Hybrid Queries

Combine keyword and vector search with semantic reranking:

from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from openai import AzureOpenAI

class HybridSearchService:
    def __init__(self, search_endpoint: str, index_name: str, openai_client: AzureOpenAI):
        self.search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=DefaultAzureCredential())
        self.openai_client = openai_client

    def get_embedding(self, text: str) -> list[float]:
        """Generate embedding for search query."""
        response = self.openai_client.embeddings.create(model="text-embedding-3-small", input=text)
        return response.data[0].embedding

    def hybrid_search(self, query: str, top_k: int = 10, category_filter: str = None) -> list[dict]:
        """Execute hybrid search with semantic reranking."""

        query_vector = self.get_embedding(query)
        vector_query = VectorizedQuery(vector=query_vector, k_nearest_neighbors=top_k * 2, fields="content_vector")
        filter_expr = f"category eq '{category_filter}'" if category_filter else None

        results = self.search_client.search(
            search_text=query,
            vector_queries=[vector_query],
            filter=filter_expr,
            select=["id", "title", "content", "category"],
            top=top_k,
            query_type="semantic",
            semantic_configuration_name="semantic-config"
        )

        return [{"id": r["id"], "title": r["title"], "content": r["content"], "score": r["@search.score"]} for r in results]

Hybrid search significantly improves retrieval quality for RAG applications by leveraging both semantic understanding and exact keyword matching.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.