Skip to content
Back to Blog
2 min read

Vector Databases for AI Applications

I wrote “Vector Databases for AI Applications” to share practical, production-minded guidance on this topic.

Why Vector Databases?

Traditional databases optimize for exact matches. AI applications need similarity:

  • “Find documents similar to this query”
  • “Recommend products like ones this user bought”
  • “Match this image to similar images”

Vectors (embeddings) represent meaning in high-dimensional space. Similar items have similar vectors.

Vector Database Options

1. Pinecone

Managed, purpose-built for vectors:

import pinecone

pinecone.init(api_key="your-key", environment="us-east1-gcp")

# Create index
pinecone.create_index(
    name="documents",
    dimension=1536,  # OpenAI embedding size
    metric="cosine",
    pod_type="p1"
)

index = pinecone.Index("documents")

# Upsert vectors
index.upsert(vectors=[
    ("doc1", [0.1, 0.2, ...], {"title": "Azure Data Factory", "category": "data"}),
    ("doc2", [0.3, 0.4, ...], {"title": "Databricks", "category": "analytics"}),
])

# Query
results = index.query(
    vector=[0.15, 0.25, ...],
    top_k=5,
    include_metadata=True,
    filter={"category": {"$eq": "data"}}
)

for match in results.matches:
    print(f"{match.id}: {match.score} - {match.metadata['title']}")

2. Weaviate

Open-source with built-in vectorization:

import weaviate

client = weaviate.Client(
    url="http://localhost:8080",
    additional_headers={
        "X-OpenAI-Api-Key": "your-openai-key"
    }
)

# Create schema with automatic vectorization
client.schema.create_class({
    "class": "Document",
    "vectorizer": "text2vec-openai",
    "moduleConfig": {
        "text2vec-openai": {
            "model": "ada",
            "modelVersion": "002",
            "type": "text"
        }
    },
    "properties": [
        {"name": "title", "dataType": ["text"]},
        {"name": "content", "dataType": ["text"]},
        {"name": "category", "dataType": ["string"]}
    ]
})

# Add objects - vectors generated automatically
client.data_object.create({
    "title": "Azure Synapse Analytics",
    "content": "Unified analytics platform combining data warehousing and big data...",
    "category": "analytics"
}, "Document")

# Query with natural language
result = client.query.get("Document", ["title", "content"]) \
    .with_near_text({"concepts": ["data lakehouse"]}) \
    .with_limit(5) \
    .do()

3. Qdrant

Open-source with advanced filtering:

from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

client = QdrantClient("localhost", port=6333)

# Create collection
client.create_collection(
    collection_name="documents",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
)

# Upsert points
client.upsert(
    collection_name="documents",
    points=[
        PointStruct(
            id=1,
            vector=[0.1, 0.2, ...],
            payload={"title": "Azure ML", "category": "ai", "year": 2023}
        ),
        PointStruct(
            id=2,
            vector=[0.3, 0.4, ...],
            payload={"title": "Cognitive Services", "category": "ai", "year": 2022}
        ),
    ]
)

# Search with filters
results = client.search(
    collection_name="documents",
    query_vector=[0.15, 0.25, ...],
    query_filter={
        "must": [
            {"key": "category", "match": {"value": "ai"}},
            {"key": "year", "range": {"gte": 2022}}
        ]
    },
    limit=5
)

Enterprise option with hybrid capabilities:

from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswVectorSearchAlgorithmConfiguration,
)
from azure.core.credentials import AzureKeyCredential

# Create index with vector field
index_client = SearchIndexClient(
    endpoint="https://your-search.search.windows.net",
    credential=AzureKeyCredential("your-key")
)

index = SearchIndex(
    name="documents",
    fields=[
        SearchField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="title", type=SearchFieldDataType.String, searchable=True),
        SearchField(name="content", type=SearchFieldDataType.String, searchable=True),
        SearchField(
            name="vector",
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=1536,
            vector_search_configuration="default"
        ),
    ],
    vector_search=VectorSearch(
        algorithm_configurations=[
            HnswVectorSearchAlgorithmConfiguration(name="default")
        ]
    )
)

index_client.create_or_update_index(index)

# Search
search_client = SearchClient(
    endpoint="https://your-search.search.windows.net",
    index_name="documents",
    credential=AzureKeyCredential("your-key")
)

# Hybrid search (vector + keyword)
results = search_client.search(
    search_text="data warehouse",  # Keyword search
    vector=query_embedding,         # Vector search
    top_k=10,
    vector_fields="vector",
    select=["id", "title", "content"]
)

Comparison Matrix

FeaturePineconeWeaviateQdrantAzure Search
ManagedYesOptionalOptionalYes
Open SourceNoYesYesNo
Built-in EmbeddingNoYesNoNo
Hybrid SearchLimitedYesYesYes
FilteringYesYesAdvancedYes
ScaleExcellentGoodGoodExcellent
Azure IntegrationAPIAPIAPINative

Architecture Patterns

Pattern 1: RAG with Vector Store

from dataclasses import dataclass
from typing import Protocol

class VectorStore(Protocol):
    def add(self, id: str, vector: list[float], metadata: dict): ...
    def search(self, vector: list[float], k: int) -> list[dict]: ...

@dataclass
class RAGConfig:
    vector_store: VectorStore
    embedding_model: str
    chat_model: str
    top_k: int = 5

class RAGApplication:
    def __init__(self, config: RAGConfig):
        self.config = config
        self.vector_store = config.vector_store

    def ingest(self, documents: list[dict]):
        """Ingest documents into vector store."""
        for doc in documents:
            embedding = self._get_embedding(doc["content"])
            self.vector_store.add(
                id=doc["id"],
                vector=embedding,
                metadata={
                    "title": doc.get("title"),
                    "source": doc.get("source")
                }
            )

    def query(self, question: str) -> dict:
        """Query with RAG."""
        # Get embedding for question
        query_vector = self._get_embedding(question)

        # Retrieve relevant documents
        results = self.vector_store.search(query_vector, self.config.top_k)

        # Build context
        context = "\n\n".join([r["content"] for r in results])

        # Generate response
        response = self._generate(question, context)

        return {
            "answer": response,
            "sources": [{"id": r["id"], "score": r["score"]} for r in results]
        }

    def _get_embedding(self, text: str) -> list[float]:
        # Implementation depends on embedding provider
        pass

    def _generate(self, question: str, context: str) -> str:
        # Implementation depends on LLM provider
        pass

Pattern 2: Multi-Vector Retrieval

For complex documents, use multiple vectors:

class MultiVectorDocument:
    """Document with multiple vector representations."""

    def __init__(self, doc_id: str, content: str):
        self.doc_id = doc_id
        self.content = content

    def get_vectors(self) -> dict[str, list[float]]:
        """Generate multiple vectors for different aspects."""
        return {
            "summary": self._embed(self._summarize(self.content)),
            "questions": self._embed(self._generate_questions(self.content)),
            "full": self._embed(self.content[:8000])  # Truncate for embedding
        }

    def _summarize(self, content: str) -> str:
        # Use LLM to summarize
        pass

    def _generate_questions(self, content: str) -> str:
        # Use LLM to generate potential questions
        pass

    def _embed(self, text: str) -> list[float]:
        # Get embedding
        pass

class MultiVectorRetriever:
    def search(self, query: str, strategy: str = "summary") -> list[dict]:
        """Search using specified vector type."""
        query_vector = self._embed(query)

        # Search against specific vector type
        results = self.vector_store.search(
            vector=query_vector,
            filter={"vector_type": strategy}
        )

        return results

Pattern 3: Tiered Retrieval

Coarse-to-fine retrieval for large datasets:

class TieredRetriever:
    """Two-stage retrieval for efficiency."""

    def __init__(self, coarse_store, fine_store):
        self.coarse_store = coarse_store  # Fewer, summarized vectors
        self.fine_store = fine_store       # Full document vectors

    def search(self, query: str, top_k: int = 5) -> list[dict]:
        query_vector = self._embed(query)

        # Stage 1: Coarse retrieval (fast)
        coarse_results = self.coarse_store.search(
            vector=query_vector,
            k=top_k * 10  # Get more candidates
        )

        # Get document IDs from coarse results
        candidate_ids = [r["doc_id"] for r in coarse_results]

        # Stage 2: Fine retrieval (accurate)
        fine_results = self.fine_store.search(
            vector=query_vector,
            filter={"doc_id": {"$in": candidate_ids}},
            k=top_k
        )

        return fine_results

Performance Optimization

class VectorStoreOptimizer:
    """Optimize vector store operations."""

    def __init__(self, store):
        self.store = store
        self.cache = {}

    def batch_upsert(self, vectors: list[tuple], batch_size: int = 100):
        """Batch upsert for efficiency."""
        for i in range(0, len(vectors), batch_size):
            batch = vectors[i:i + batch_size]
            self.store.upsert(batch)

    def cached_search(self, query_hash: str, query_vector: list[float], k: int) -> list:
        """Cache frequent queries."""
        if query_hash in self.cache:
            return self.cache[query_hash]

        results = self.store.search(query_vector, k)
        self.cache[query_hash] = results
        return results

    def quantize_vectors(self, vectors: list[list[float]], bits: int = 8) -> list[list[int]]:
        """Reduce vector size with quantization."""
        import numpy as np

        vectors_np = np.array(vectors)
        min_val, max_val = vectors_np.min(), vectors_np.max()

        # Scale to integer range
        scale = (2 ** bits - 1) / (max_val - min_val)
        quantized = ((vectors_np - min_val) * scale).astype(int)

        return quantized.tolist()

Vector databases are the foundation for modern AI applications. Choose based on your scale, filtering needs, and Azure integration requirements. For most Azure-centric applications, start with Azure Cognitive Search for its native integration, then evaluate specialized options as needs grow.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Pena

Michael John Pena

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.