Back to Blog
2 min read

Cosmos DB Vector Search: Building AI-Ready Databases

Azure Cosmos DB now supports native vector search, enabling AI-ready applications with global scale.

from azure.cosmos import CosmosClient, PartitionKey
from azure.cosmos.exceptions import CosmosResourceNotFoundError
import json

class CosmosVectorStore:
    def __init__(self, connection_string: str, database_name: str, container_name: str):
        self.client = CosmosClient.from_connection_string(connection_string)
        self.database = self.client.get_database_client(database_name)
        self.container = self.database.get_container_client(container_name)

    def create_container_with_vector(self):
        """Create container with vector indexing policy."""
        indexing_policy = {
            "indexingMode": "consistent",
            "automatic": True,
            "includedPaths": [{"path": "/*"}],
            "excludedPaths": [{"path": "/embedding/*"}],
            "vectorIndexes": [
                {
                    "path": "/embedding",
                    "type": "quantizedFlat"  # or "flat", "diskANN"
                }
            ]
        }

        vector_embedding_policy = {
            "vectorEmbeddings": [
                {
                    "path": "/embedding",
                    "dataType": "float32",
                    "distanceFunction": "cosine",
                    "dimensions": 1536
                }
            ]
        }

        self.database.create_container(
            id=self.container.id,
            partition_key=PartitionKey(path="/category"),
            indexing_policy=indexing_policy,
            vector_embedding_policy=vector_embedding_policy
        )

    async def upsert_document(self, doc_id: str, content: str, embedding: list, metadata: dict):
        """Upsert document with embedding."""
        document = {
            "id": doc_id,
            "content": content,
            "embedding": embedding,
            "category": metadata.get("category", "default"),
            **metadata
        }
        self.container.upsert_item(document)

    async def vector_search(self, query_embedding: list, top_k: int = 10, filters: str = None) -> list:
        """Perform vector similarity search."""
        query = """
        SELECT TOP @top_k
            c.id, c.content, c.category,
            VectorDistance(c.embedding, @queryVector) AS similarity
        FROM c
        WHERE VectorDistance(c.embedding, @queryVector) > 0.7
        """

        if filters:
            query += f" AND {filters}"

        query += " ORDER BY VectorDistance(c.embedding, @queryVector)"

        results = list(self.container.query_items(
            query=query,
            parameters=[
                {"name": "@top_k", "value": top_k},
                {"name": "@queryVector", "value": query_embedding}
            ],
            enable_cross_partition_query=True
        ))

        return results

    async def hybrid_search(self, text_query: str, query_embedding: list, top_k: int = 10) -> list:
        """Combine full-text and vector search."""
        query = """
        SELECT TOP @top_k
            c.id, c.content,
            VectorDistance(c.embedding, @queryVector) AS vectorScore,
            CONTAINS(LOWER(c.content), LOWER(@textQuery)) AS textMatch
        FROM c
        WHERE CONTAINS(LOWER(c.content), LOWER(@textQuery))
           OR VectorDistance(c.embedding, @queryVector) > 0.7
        ORDER BY VectorDistance(c.embedding, @queryVector)
        """

        return list(self.container.query_items(
            query=query,
            parameters=[
                {"name": "@top_k", "value": top_k},
                {"name": "@queryVector", "value": query_embedding},
                {"name": "@textQuery", "value": text_query}
            ],
            enable_cross_partition_query=True
        ))

Cosmos DB vector search brings global scale and multi-model capabilities to AI applications.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.