Back to Blog
1 min read

Vector Search Optimization: Scaling to Billions of Vectors

As vector databases grow, optimization becomes critical. Here’s how to scale vector search effectively.

Vector Search at Scale

from azure.search.documents import SearchClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    ScalarQuantizationCompressionConfiguration
)

class OptimizedVectorSearch:
    def __init__(self, search_client: SearchClient):
        self.client = search_client

    def create_optimized_index(self) -> SearchIndex:
        """Create index with optimized vector configuration."""
        return SearchIndex(
            name="optimized-vectors",
            fields=[
                SearchableField(name="id", type="Edm.String", key=True),
                SearchableField(name="content", type="Edm.String"),
                SearchField(
                    name="embedding",
                    type="Collection(Edm.Single)",
                    vector_search_dimensions=1536,
                    vector_search_profile_name="optimized-profile"
                )
            ],
            vector_search=VectorSearch(
                algorithms=[
                    HnswAlgorithmConfiguration(
                        name="hnsw-config",
                        parameters={
                            "m": 4,  # Connections per node (lower = faster, less accurate)
                            "efConstruction": 400,  # Build quality
                            "efSearch": 500,  # Search quality
                            "metric": "cosine"
                        }
                    )
                ],
                compressions=[
                    ScalarQuantizationCompressionConfiguration(
                        name="scalar-quantization",
                        parameters={
                            "quantizedDataType": "int8"  # 4x memory reduction
                        }
                    )
                ],
                profiles=[
                    VectorSearchProfile(
                        name="optimized-profile",
                        algorithm_configuration_name="hnsw-config",
                        compression_configuration_name="scalar-quantization"
                    )
                ]
            )
        )

    async def batch_index(self, documents: list, batch_size: int = 1000):
        """Index documents in optimized batches."""
        for i in range(0, len(documents), batch_size):
            batch = documents[i:i + batch_size]
            await self.client.upload_documents(batch)

    async def filtered_vector_search(self, query_vector: list, filters: str, top_k: int = 10):
        """Efficient filtered vector search."""
        return await self.client.search(
            vector_queries=[{
                "vector": query_vector,
                "k": top_k,
                "fields": "embedding"
            }],
            filter=filters,
            select=["id", "content"]
        )

Quantization and HNSW tuning enable vector search at billion-scale with reasonable latency.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.