5 min read
Azure Cognitive Search Vector Search: Building Semantic Search Applications
Azure Cognitive Search has introduced vector search capabilities, enabling semantic search applications that understand meaning, not just keywords. Today, I will show you how to build vector search solutions.
What is Vector Search?
Vector search uses embeddings (dense vector representations) to find semantically similar content:
┌─────────────────────────────────────────────────────┐
│ Vector Search Pipeline │
├─────────────────────────────────────────────────────┤
│ │
│ Document ──▶ Embedding Model ──▶ Vector [0.1,...] │
│ │
│ Query ──▶ Embedding Model ──▶ Query Vector │
│ │
│ Query Vector ──▶ Similarity Search ──▶ Results │
│ (cosine, dot product) │
│ │
└─────────────────────────────────────────────────────┘
Setting Up Vector Search Index
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
SearchIndex,
SearchField,
SearchFieldDataType,
SimpleField,
SearchableField,
VectorSearch,
HnswAlgorithmConfiguration,
VectorSearchProfile,
SemanticConfiguration,
SemanticField,
SemanticPrioritizedFields,
SemanticSearch
)
from azure.core.credentials import AzureKeyCredential
# Initialize client
index_client = SearchIndexClient(
endpoint="https://your-search.search.windows.net",
credential=AzureKeyCredential("your-admin-key")
)
# Define index with vector field
fields = [
SimpleField(
name="id",
type=SearchFieldDataType.String,
key=True,
filterable=True
),
SearchableField(
name="title",
type=SearchFieldDataType.String,
searchable=True
),
SearchableField(
name="content",
type=SearchFieldDataType.String,
searchable=True
),
SimpleField(
name="category",
type=SearchFieldDataType.String,
filterable=True,
facetable=True
),
SearchField(
name="content_vector",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
vector_search_dimensions=1536, # OpenAI ada-002 dimensions
vector_search_profile_name="my-vector-profile"
)
]
# Configure vector search
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="my-hnsw-config",
parameters={
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine"
}
)
],
profiles=[
VectorSearchProfile(
name="my-vector-profile",
algorithm_configuration_name="my-hnsw-config"
)
]
)
# Configure semantic search
semantic_config = SemanticConfiguration(
name="my-semantic-config",
prioritized_fields=SemanticPrioritizedFields(
title_field=SemanticField(field_name="title"),
content_fields=[SemanticField(field_name="content")]
)
)
semantic_search = SemanticSearch(configurations=[semantic_config])
# Create index
index = SearchIndex(
name="documents-vector",
fields=fields,
vector_search=vector_search,
semantic_search=semantic_search
)
index_client.create_or_update_index(index)
print(f"Index 'documents-vector' created")
Generating Embeddings
from openai import AzureOpenAI
openai_client = AzureOpenAI(
api_key="your-openai-key",
api_version="2023-05-15",
azure_endpoint="https://your-openai.openai.azure.com"
)
def get_embedding(text: str, model: str = "text-embedding-ada-002") -> list[float]:
"""Generate embedding for text"""
response = openai_client.embeddings.create(
model=model,
input=text
)
return response.data[0].embedding
def get_embeddings_batch(texts: list[str], model: str = "text-embedding-ada-002") -> list[list[float]]:
"""Generate embeddings for multiple texts"""
response = openai_client.embeddings.create(
model=model,
input=texts
)
return [item.embedding for item in response.data]
Indexing Documents
from azure.search.documents import SearchClient
search_client = SearchClient(
endpoint="https://your-search.search.windows.net",
index_name="documents-vector",
credential=AzureKeyCredential("your-admin-key")
)
def index_documents(documents: list[dict]):
"""Index documents with embeddings"""
# Generate embeddings for content
contents = [doc["content"] for doc in documents]
embeddings = get_embeddings_batch(contents)
# Add embeddings to documents
for doc, embedding in zip(documents, embeddings):
doc["content_vector"] = embedding
# Upload to index
result = search_client.upload_documents(documents)
print(f"Indexed {len(result)} documents")
# Example documents
documents = [
{
"id": "1",
"title": "Introduction to Machine Learning",
"content": "Machine learning is a subset of artificial intelligence...",
"category": "AI"
},
{
"id": "2",
"title": "Deep Learning Fundamentals",
"content": "Deep learning uses neural networks with many layers...",
"category": "AI"
},
{
"id": "3",
"title": "Natural Language Processing",
"content": "NLP enables computers to understand human language...",
"category": "NLP"
}
]
index_documents(documents)
Vector Search Queries
Pure Vector Search
from azure.search.documents.models import VectorizedQuery
def vector_search(query: str, top_k: int = 5) -> list[dict]:
"""Perform pure vector search"""
query_embedding = get_embedding(query)
vector_query = VectorizedQuery(
vector=query_embedding,
k_nearest_neighbors=top_k,
fields="content_vector"
)
results = search_client.search(
search_text=None,
vector_queries=[vector_query],
select=["id", "title", "content", "category"]
)
return [
{
"id": result["id"],
"title": result["title"],
"content": result["content"],
"score": result["@search.score"]
}
for result in results
]
# Usage
results = vector_search("How do neural networks learn?")
for result in results:
print(f"{result['title']} (score: {result['score']:.4f})")
Hybrid Search (Vector + Text)
def hybrid_search(query: str, top_k: int = 5) -> list[dict]:
"""Combine vector and keyword search"""
query_embedding = get_embedding(query)
vector_query = VectorizedQuery(
vector=query_embedding,
k_nearest_neighbors=top_k,
fields="content_vector"
)
results = search_client.search(
search_text=query, # Also perform text search
vector_queries=[vector_query],
select=["id", "title", "content", "category"],
top=top_k
)
return list(results)
Filtered Vector Search
def filtered_vector_search(
query: str,
category: str = None,
top_k: int = 5
) -> list[dict]:
"""Vector search with filters"""
query_embedding = get_embedding(query)
vector_query = VectorizedQuery(
vector=query_embedding,
k_nearest_neighbors=top_k,
fields="content_vector"
)
filter_expression = None
if category:
filter_expression = f"category eq '{category}'"
results = search_client.search(
search_text=None,
vector_queries=[vector_query],
filter=filter_expression,
select=["id", "title", "content", "category"]
)
return list(results)
# Search only in AI category
results = filtered_vector_search("neural networks", category="AI")
Semantic Ranking
def semantic_hybrid_search(query: str, top_k: int = 5) -> list[dict]:
"""Hybrid search with semantic ranking"""
query_embedding = get_embedding(query)
vector_query = VectorizedQuery(
vector=query_embedding,
k_nearest_neighbors=top_k,
fields="content_vector"
)
results = search_client.search(
search_text=query,
vector_queries=[vector_query],
query_type="semantic",
semantic_configuration_name="my-semantic-config",
query_caption="extractive",
query_answer="extractive",
top=top_k
)
processed_results = []
for result in results:
item = {
"id": result["id"],
"title": result["title"],
"content": result["content"],
"score": result["@search.score"],
"reranker_score": result.get("@search.reranker_score")
}
# Add captions if available
if "@search.captions" in result:
captions = result["@search.captions"]
if captions:
item["caption"] = captions[0].text
processed_results.append(item)
return processed_results
RAG Pattern Implementation
class VectorRAG:
"""RAG implementation using Azure Cognitive Search"""
def __init__(self, search_client, openai_client):
self.search = search_client
self.openai = openai_client
def retrieve(self, query: str, top_k: int = 5) -> list[dict]:
"""Retrieve relevant documents"""
query_embedding = get_embedding(query)
vector_query = VectorizedQuery(
vector=query_embedding,
k_nearest_neighbors=top_k,
fields="content_vector"
)
results = self.search.search(
search_text=query,
vector_queries=[vector_query],
query_type="semantic",
semantic_configuration_name="my-semantic-config",
top=top_k
)
return list(results)
def generate(self, query: str, context: list[dict]) -> str:
"""Generate answer using retrieved context"""
context_text = "\n\n".join([
f"Title: {doc['title']}\nContent: {doc['content']}"
for doc in context
])
messages = [
{
"role": "system",
"content": """You are a helpful assistant that answers questions based on the provided context.
Only use information from the context. If the answer is not in the context, say so."""
},
{
"role": "user",
"content": f"""Context:
{context_text}
Question: {query}
Answer:"""
}
]
response = self.openai.chat.completions.create(
model="gpt-4",
messages=messages,
temperature=0.7,
max_tokens=500
)
return response.choices[0].message.content
def query(self, question: str) -> dict:
"""End-to-end RAG query"""
# Retrieve
documents = self.retrieve(question)
# Generate
answer = self.generate(question, documents)
return {
"question": question,
"answer": answer,
"sources": [
{"title": doc["title"], "id": doc["id"]}
for doc in documents
]
}
# Usage
rag = VectorRAG(search_client, openai_client)
result = rag.query("How do neural networks learn?")
print(f"Answer: {result['answer']}")
print(f"Sources: {result['sources']}")
Vector search enables powerful semantic search capabilities. Tomorrow, I will cover hybrid retrieval patterns in more detail.