3 min read
Integrated Vectorization in Azure AI Search: Simplifying RAG Pipelines
Integrated vectorization eliminates the need for separate embedding pipelines by vectorizing content automatically during indexing and querying. This significantly simplifies RAG architectures.
Before and After
Traditional Approach
# Before: Manual embedding pipeline
from openai import AzureOpenAI
def index_document(doc):
# Step 1: Generate embedding manually
embedding = openai_client.embeddings.create(
model="text-embedding-ada-002",
input=doc["content"]
).data[0].embedding
# Step 2: Add to document
doc["content_vector"] = embedding
# Step 3: Upload to search
search_client.upload_documents([doc])
def search_with_vector(query):
# Step 1: Generate query embedding
query_vector = openai_client.embeddings.create(
model="text-embedding-ada-002",
input=query
).data[0].embedding
# Step 2: Search
return search_client.search(
vector_queries=[VectorizedQuery(vector=query_vector, ...)]
)
Integrated Vectorization
# After: Automatic vectorization
def index_document(doc):
# Just upload - vectorization happens automatically
search_client.upload_documents([doc])
def search_with_vector(query):
# Just search with text - vectorization happens automatically
return search_client.search(
search_text=query,
vector_queries=[VectorizableTextQuery(text=query, ...)]
)
Configuration
Index Definition
from azure.search.documents.indexes.models import (
SearchIndex, SearchField, VectorSearch,
VectorSearchProfile, HnswAlgorithmConfiguration,
AzureOpenAIVectorizer, AzureOpenAIParameters
)
index = SearchIndex(
name="documents",
fields=[
SearchField(name="id", type="Edm.String", key=True),
SearchField(name="title", type="Edm.String", searchable=True),
SearchField(name="content", type="Edm.String", searchable=True),
SearchField(
name="content_vector",
type="Collection(Edm.Single)",
searchable=True,
vector_search_dimensions=1536,
vector_search_profile_name="vector-profile"
)
],
vector_search=VectorSearch(
algorithms=[HnswAlgorithmConfiguration(name="hnsw-config")],
profiles=[
VectorSearchProfile(
name="vector-profile",
algorithm_configuration_name="hnsw-config",
vectorizer="openai-vectorizer"
)
],
vectorizers=[
AzureOpenAIVectorizer(
name="openai-vectorizer",
azure_open_ai_parameters=AzureOpenAIParameters(
resource_uri="https://your-resource.openai.azure.com",
deployment_id="text-embedding-ada-002",
model_name="text-embedding-ada-002"
)
)
]
)
)
Skillset for Indexing
from azure.search.documents.indexes.models import (
SearchIndexerSkillset,
AzureOpenAIEmbeddingSkill,
InputFieldMappingEntry,
OutputFieldMappingEntry
)
skillset = SearchIndexerSkillset(
name="vectorization-skillset",
skills=[
AzureOpenAIEmbeddingSkill(
name="embedding-skill",
resource_uri="https://your-resource.openai.azure.com",
deployment_id="text-embedding-ada-002",
model_name="text-embedding-ada-002",
inputs=[
InputFieldMappingEntry(name="text", source="/document/content")
],
outputs=[
OutputFieldMappingEntry(name="embedding", target_name="content_vector")
]
)
]
)
Query-Time Vectorization
from azure.search.documents.models import VectorizableTextQuery
def search_documents(query: str, top_k: int = 10):
"""Search with automatic query vectorization."""
# Text query that gets vectorized automatically
vector_query = VectorizableTextQuery(
text=query,
k_nearest_neighbors=top_k,
fields="content_vector"
)
results = search_client.search(
search_text=query, # For hybrid search
vector_queries=[vector_query],
query_type="semantic",
semantic_configuration_name="semantic-config",
top=top_k
)
return list(results)
Cost Considerations
# Estimate vectorization costs
def estimate_costs(document_count: int, avg_tokens_per_doc: int):
"""Estimate integrated vectorization costs."""
# Azure OpenAI embedding pricing (approximate)
cost_per_1k_tokens = 0.0001
# Indexing: One embedding per document
indexing_tokens = document_count * avg_tokens_per_doc
indexing_cost = (indexing_tokens / 1000) * cost_per_1k_tokens
# Querying: One embedding per query
queries_per_day = 10000
avg_query_tokens = 50
query_cost_per_day = (queries_per_day * avg_query_tokens / 1000) * cost_per_1k_tokens
return {
"indexing_cost": indexing_cost,
"daily_query_cost": query_cost_per_day,
"monthly_query_cost": query_cost_per_day * 30
}
# Example
costs = estimate_costs(100000, 500) # 100K docs, 500 tokens each
print(f"Indexing: ${costs['indexing_cost']:.2f}")
print(f"Monthly queries: ${costs['monthly_query_cost']:.2f}")
Best Practices
- Use managed identity for Azure OpenAI authentication
- Monitor embedding latency during indexing
- Cache query embeddings for repeated queries
- Set appropriate rate limits to avoid throttling
- Test with representative data before full migration
Conclusion
Integrated vectorization transforms RAG development by eliminating manual embedding pipelines. The result is simpler code, fewer moving parts, and more reliable systems. Start using it for new projects and migrate existing ones when ready.