Skip to content
Back to Blog
2 min read

Building Semantic Search with Azure OpenAI Embeddings

I wrote “Building Semantic Search with Azure OpenAI Embeddings” to share practical, production-minded guidance on this topic.

What Are Embeddings?

Embeddings are numerical representations of text that capture semantic meaning. Similar concepts have similar vectors. Azure OpenAI provides the text-embedding-ada-002 model, which creates 1536-dimensional vectors.

import openai
from azure.identity import DefaultAzureCredential

# Configure Azure OpenAI
openai.api_type = "azure"
openai.api_base = "https://your-resource.openai.azure.com/"
openai.api_version = "2023-03-15-preview"
openai.api_key = "your-api-key"

def get_embedding(text: str) -> list[float]:
    """Generate embedding for text using Azure OpenAI."""
    response = openai.Embedding.create(
        input=text,
        engine="text-embedding-ada-002"  # Your deployment name
    )
    return response['data'][0]['embedding']

# Example usage
doc_embedding = get_embedding("Azure Data Factory is a cloud ETL service")
query_embedding = get_embedding("How do I move data in Azure?")

Computing Similarity

Cosine similarity measures how similar two vectors are:

import numpy as np

def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
    """Calculate cosine similarity between two vectors."""
    a = np.array(vec1)
    b = np.array(vec2)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Compare query to document
similarity = cosine_similarity(query_embedding, doc_embedding)
print(f"Similarity: {similarity:.4f}")  # Higher = more similar

Building a Simple Search Index

For a proof of concept, we can store embeddings in memory:

from dataclasses import dataclass
from typing import Optional

@dataclass
class Document:
    id: str
    content: str
    embedding: Optional[list[float]] = None

class SemanticSearchIndex:
    def __init__(self):
        self.documents: list[Document] = []

    def add_document(self, doc_id: str, content: str):
        """Add a document with its embedding."""
        embedding = get_embedding(content)
        self.documents.append(Document(
            id=doc_id,
            content=content,
            embedding=embedding
        ))

    def search(self, query: str, top_k: int = 5) -> list[tuple[Document, float]]:
        """Search for similar documents."""
        query_embedding = get_embedding(query)

        results = []
        for doc in self.documents:
            similarity = cosine_similarity(query_embedding, doc.embedding)
            results.append((doc, similarity))

        # Sort by similarity descending
        results.sort(key=lambda x: x[1], reverse=True)
        return results[:top_k]

# Usage
index = SemanticSearchIndex()
index.add_document("doc1", "Azure Synapse Analytics provides unified analytics")
index.add_document("doc2", "Databricks is a lakehouse platform")
index.add_document("doc3", "Power BI creates interactive dashboards")

results = index.search("data visualization tool")
for doc, score in results:
    print(f"{doc.id}: {score:.4f} - {doc.content[:50]}...")

For production, use Azure Cognitive Search with vector support:

from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    VectorSearch,
    HnswVectorSearchAlgorithmConfiguration,
)
from azure.core.credentials import AzureKeyCredential

# Create index with vector field
index = SearchIndex(
    name="documents",
    fields=[
        SearchField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="content", type=SearchFieldDataType.String, searchable=True),
        SearchField(
            name="contentVector",
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=1536,
            vector_search_configuration="my-vector-config"
        ),
    ],
    vector_search=VectorSearch(
        algorithm_configurations=[
            HnswVectorSearchAlgorithmConfiguration(name="my-vector-config")
        ]
    )
)

# Search with vector
search_client = SearchClient(
    endpoint="https://your-search.search.windows.net",
    index_name="documents",
    credential=AzureKeyCredential("your-key")
)

results = search_client.search(
    search_text=None,
    vector=query_embedding,
    top_k=10,
    vector_fields="contentVector"
)

Chunking Strategies

Large documents need to be split into chunks:

def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
    """Split text into overlapping chunks."""
    words = text.split()
    chunks = []

    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        chunks.append(chunk)

        if i + chunk_size >= len(words):
            break

    return chunks

# Chunk and embed a large document
document = "..." # Your large document
chunks = chunk_text(document)

for i, chunk in enumerate(chunks):
    index.add_document(f"doc1_chunk{i}", chunk)

Combine semantic and keyword search for best results:

def hybrid_search(query: str, index: SemanticSearchIndex, keyword_weight: float = 0.3):
    """Combine semantic and keyword search."""
    # Semantic results
    semantic_results = index.search(query, top_k=20)

    # Simple keyword matching
    query_terms = set(query.lower().split())
    keyword_scores = {}

    for doc in index.documents:
        doc_terms = set(doc.content.lower().split())
        overlap = len(query_terms & doc_terms)
        keyword_scores[doc.id] = overlap / len(query_terms) if query_terms else 0

    # Combine scores
    combined = {}
    for doc, semantic_score in semantic_results:
        keyword_score = keyword_scores.get(doc.id, 0)
        combined[doc.id] = (
            (1 - keyword_weight) * semantic_score +
            keyword_weight * keyword_score
        )

    # Sort and return
    sorted_results = sorted(combined.items(), key=lambda x: x[1], reverse=True)
    return sorted_results[:10]

Cost Optimization

Embeddings cost ~$0.0004 per 1K tokens. Strategies to optimize:

  1. Cache embeddings: Store and reuse document embeddings
  2. Batch requests: Embed multiple texts in one API call
  3. Pre-compute: Generate embeddings during ingestion, not query time
def batch_embed(texts: list[str], batch_size: int = 100) -> list[list[float]]:
    """Embed multiple texts efficiently."""
    embeddings = []

    for i in range(0, len(texts), batch_size):
        batch = texts[i:i + batch_size]
        response = openai.Embedding.create(
            input=batch,
            engine="text-embedding-ada-002"
        )
        batch_embeddings = [item['embedding'] for item in response['data']]
        embeddings.extend(batch_embeddings)

    return embeddings

Real-World Application

I’ve been using semantic search for internal documentation at clients. Results are dramatically better than keyword search - users find answers even when they don’t know the exact terminology.

Next steps to explore:

  • Adding metadata filtering
  • Re-ranking with cross-encoders
  • Integrating with RAG patterns

The combination of Azure OpenAI embeddings and Azure Cognitive Search provides a production-ready semantic search foundation.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Pena

Michael John Pena

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.