March 9, 2023 2 min read

Vector Databases for AI Applications

AI Vector Databases Embeddings Architecture Azure

As AI applications explode, vector databases have become critical infrastructure. They enable similarity search at scale, powering recommendation systems, semantic search, and RAG applications. Let’s explore the landscape and implementation patterns.

Why Vector Databases?

Traditional databases optimize for exact matches. AI applications need similarity:

“Find documents similar to this query”
“Recommend products like ones this user bought”
“Match this image to similar images”

Vectors (embeddings) represent meaning in high-dimensional space. Similar items have similar vectors.

Vector Database Options

1. Pinecone

Managed, purpose-built for vectors:

import pinecone

pinecone.init(api_key="your-key", environment="us-east1-gcp")

# Create index
pinecone.create_index(
    name="documents",
    dimension=1536,  # OpenAI embedding size
    metric="cosine",
    pod_type="p1"
)

index = pinecone.Index("documents")

# Upsert vectors
index.upsert(vectors=[
    ("doc1", [0.1, 0.2, ...], {"title": "Azure Data Factory", "category": "data"}),
    ("doc2", [0.3, 0.4, ...], {"title": "Databricks", "category": "analytics"}),
])

# Query
results = index.query(
    vector=[0.15, 0.25, ...],
    top_k=5,
    include_metadata=True,
    filter={"category": {"$eq": "data"}}
)

for match in results.matches:
    print(f"{match.id}: {match.score} - {match.metadata['title']}")

2. Weaviate

Open-source with built-in vectorization:

import weaviate

client = weaviate.Client(
    url="http://localhost:8080",
    additional_headers={
        "X-OpenAI-Api-Key": "your-openai-key"
    }
)

# Create schema with automatic vectorization
client.schema.create_class({
    "class": "Document",
    "vectorizer": "text2vec-openai",
    "moduleConfig": {
        "text2vec-openai": {
            "model": "ada",
            "modelVersion": "002",
            "type": "text"
        }
    },
    "properties": [
        {"name": "title", "dataType": ["text"]},
        {"name": "content", "dataType": ["text"]},
        {"name": "category", "dataType": ["string"]}
    ]
})

# Add objects - vectors generated automatically
client.data_object.create({
    "title": "Azure Synapse Analytics",
    "content": "Unified analytics platform combining data warehousing and big data...",
    "category": "analytics"
}, "Document")

# Query with natural language
result = client.query.get("Document", ["title", "content"]) \
    .with_near_text({"concepts": ["data lakehouse"]}) \
    .with_limit(5) \
    .do()

3. Qdrant

Open-source with advanced filtering:

from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

client = QdrantClient("localhost", port=6333)

# Create collection
client.create_collection(
    collection_name="documents",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
)

# Upsert points
client.upsert(
    collection_name="documents",
    points=[
        PointStruct(
            id=1,
            vector=[0.1, 0.2, ...],
            payload={"title": "Azure ML", "category": "ai", "year": 2023}
        ),
        PointStruct(
            id=2,
            vector=[0.3, 0.4, ...],
            payload={"title": "Cognitive Services", "category": "ai", "year": 2022}
        ),
    ]
)

# Search with filters
results = client.search(
    collection_name="documents",
    query_vector=[0.15, 0.25, ...],
    query_filter={
        "must": [
            {"key": "category", "match": {"value": "ai"}},
            {"key": "year", "range": {"gte": 2022}}
        ]
    },
    limit=5
)

4. Azure Cognitive Search

Enterprise option with hybrid capabilities:

from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswVectorSearchAlgorithmConfiguration,
)
from azure.core.credentials import AzureKeyCredential

# Create index with vector field
index_client = SearchIndexClient(
    endpoint="https://your-search.search.windows.net",
    credential=AzureKeyCredential("your-key")
)

index = SearchIndex(
    name="documents",
    fields=[
        SearchField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="title", type=SearchFieldDataType.String, searchable=True),
        SearchField(name="content", type=SearchFieldDataType.String, searchable=True),
        SearchField(
            name="vector",
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=1536,
            vector_search_configuration="default"
        ),
    ],
    vector_search=VectorSearch(
        algorithm_configurations=[
            HnswVectorSearchAlgorithmConfiguration(name="default")
        ]
    )
)

index_client.create_or_update_index(index)

# Search
search_client = SearchClient(
    endpoint="https://your-search.search.windows.net",
    index_name="documents",
    credential=AzureKeyCredential("your-key")
)

# Hybrid search (vector + keyword)
results = search_client.search(
    search_text="data warehouse",  # Keyword search
    vector=query_embedding,         # Vector search
    top_k=10,
    vector_fields="vector",
    select=["id", "title", "content"]
)

Comparison Matrix

Feature	Pinecone	Weaviate	Qdrant	Azure Search
Managed	Yes	Optional	Optional	Yes
Open Source	No	Yes	Yes	No
Built-in Embedding	No	Yes	No	No
Hybrid Search	Limited	Yes	Yes	Yes
Filtering	Yes	Yes	Advanced	Yes
Scale	Excellent	Good	Good	Excellent
Azure Integration	API	API	API	Native

Architecture Patterns

Pattern 1: RAG with Vector Store

from dataclasses import dataclass
from typing import Protocol

class VectorStore(Protocol):
    def add(self, id: str, vector: list[float], metadata: dict): ...
    def search(self, vector: list[float], k: int) -> list[dict]: ...

@dataclass
class RAGConfig:
    vector_store: VectorStore
    embedding_model: str
    chat_model: str
    top_k: int = 5

class RAGApplication:
    def __init__(self, config: RAGConfig):
        self.config = config
        self.vector_store = config.vector_store

    def ingest(self, documents: list[dict]):
        """Ingest documents into vector store."""
        for doc in documents:
            embedding = self._get_embedding(doc["content"])
            self.vector_store.add(
                id=doc["id"],
                vector=embedding,
                metadata={
                    "title": doc.get("title"),
                    "source": doc.get("source")
                }
            )

    def query(self, question: str) -> dict:
        """Query with RAG."""
        # Get embedding for question
        query_vector = self._get_embedding(question)

        # Retrieve relevant documents
        results = self.vector_store.search(query_vector, self.config.top_k)

        # Build context
        context = "\n\n".join([r["content"] for r in results])

        # Generate response
        response = self._generate(question, context)

        return {
            "answer": response,
            "sources": [{"id": r["id"], "score": r["score"]} for r in results]
        }

    def _get_embedding(self, text: str) -> list[float]:
        # Implementation depends on embedding provider
        pass

    def _generate(self, question: str, context: str) -> str:
        # Implementation depends on LLM provider
        pass

Pattern 2: Multi-Vector Retrieval

For complex documents, use multiple vectors:

class MultiVectorDocument:
    """Document with multiple vector representations."""

    def __init__(self, doc_id: str, content: str):
        self.doc_id = doc_id
        self.content = content

    def get_vectors(self) -> dict[str, list[float]]:
        """Generate multiple vectors for different aspects."""
        return {
            "summary": self._embed(self._summarize(self.content)),
            "questions": self._embed(self._generate_questions(self.content)),
            "full": self._embed(self.content[:8000])  # Truncate for embedding
        }

    def _summarize(self, content: str) -> str:
        # Use LLM to summarize
        pass

    def _generate_questions(self, content: str) -> str:
        # Use LLM to generate potential questions
        pass

    def _embed(self, text: str) -> list[float]:
        # Get embedding
        pass

class MultiVectorRetriever:
    def search(self, query: str, strategy: str = "summary") -> list[dict]:
        """Search using specified vector type."""
        query_vector = self._embed(query)

        # Search against specific vector type
        results = self.vector_store.search(
            vector=query_vector,
            filter={"vector_type": strategy}
        )

        return results

Pattern 3: Tiered Retrieval

Coarse-to-fine retrieval for large datasets:

class TieredRetriever:
    """Two-stage retrieval for efficiency."""

    def __init__(self, coarse_store, fine_store):
        self.coarse_store = coarse_store  # Fewer, summarized vectors
        self.fine_store = fine_store       # Full document vectors

    def search(self, query: str, top_k: int = 5) -> list[dict]:
        query_vector = self._embed(query)

        # Stage 1: Coarse retrieval (fast)
        coarse_results = self.coarse_store.search(
            vector=query_vector,
            k=top_k * 10  # Get more candidates
        )

        # Get document IDs from coarse results
        candidate_ids = [r["doc_id"] for r in coarse_results]

        # Stage 2: Fine retrieval (accurate)
        fine_results = self.fine_store.search(
            vector=query_vector,
            filter={"doc_id": {"$in": candidate_ids}},
            k=top_k
        )

        return fine_results

Performance Optimization

class VectorStoreOptimizer:
    """Optimize vector store operations."""

    def __init__(self, store):
        self.store = store
        self.cache = {}

    def batch_upsert(self, vectors: list[tuple], batch_size: int = 100):
        """Batch upsert for efficiency."""
        for i in range(0, len(vectors), batch_size):
            batch = vectors[i:i + batch_size]
            self.store.upsert(batch)

    def cached_search(self, query_hash: str, query_vector: list[float], k: int) -> list:
        """Cache frequent queries."""
        if query_hash in self.cache:
            return self.cache[query_hash]

        results = self.store.search(query_vector, k)
        self.cache[query_hash] = results
        return results

    def quantize_vectors(self, vectors: list[list[float]], bits: int = 8) -> list[list[int]]:
        """Reduce vector size with quantization."""
        import numpy as np

        vectors_np = np.array(vectors)
        min_val, max_val = vectors_np.min(), vectors_np.max()

        # Scale to integer range
        scale = (2 ** bits - 1) / (max_val - min_val)
        quantized = ((vectors_np - min_val) * scale).astype(int)

        return quantized.tolist()

Vector databases are the foundation for modern AI applications. Choose based on your scale, filtering needs, and Azure integration requirements. For most Azure-centric applications, start with Azure Cognitive Search for its native integration, then evaluate specialized options as needs grow.