Skip to content
Back to Blog
1 min read

Semantic Search for Enterprise: Beyond Keyword Matching

I wrote “Semantic Search for Enterprise: Beyond Keyword Matching” to share practical, production-minded guidance on this topic.

Enterprise Semantic Search Architecture

from dataclasses import dataclass
from typing import List, Dict, Optional
import numpy as np
from datetime import datetime

@dataclass
class SearchResult:
    document_id: str
    title: str
    content: str
    score: float
    metadata: Dict
    highlights: List[str]

class EnterpriseSemanticSearch:
    """Enterprise-grade semantic search system."""

    def __init__(self, config: dict):
        self.config = config
        self._init_services()

    def _init_services(self):
        """Initialize search services."""
        from azure.search.documents import SearchClient
        from azure.core.credentials import AzureKeyCredential

        self.search_client = SearchClient(
            endpoint=self.config["search_endpoint"],
            index_name=self.config["index_name"],
            credential=AzureKeyCredential(self.config["search_key"])
        )

        self.embedding_client = self.config["embedding_client"]
        self.llm_client = self.config["llm_client"]

    async def search(
        self,
        query: str,
        filters: Dict = None,
        top_k: int = 10,
        search_type: str = "hybrid"
    ) -> List[SearchResult]:
        """Execute semantic search."""

        if search_type == "semantic":
            results = await self._semantic_search(query, top_k, filters)
        elif search_type == "keyword":
            results = await self._keyword_search(query, top_k, filters)
        else:  # hybrid
            results = await self._hybrid_search(query, top_k, filters)

        return results

    async def _semantic_search(
        self,
        query: str,
        top_k: int,
        filters: Dict
    ) -> List[SearchResult]:
        """Pure semantic search using embeddings."""

        # Generate query embedding
        query_embedding = await self._get_embedding(query)

        # Search with vector
        results = self.search_client.search(
            search_text="",
            vector_queries=[{
                "vector": query_embedding,
                "k_nearest_neighbors": top_k,
                "fields": "content_vector"
            }],
            filter=self._build_filter(filters) if filters else None,
            select=["id", "title", "content", "metadata"]
        )

        return self._parse_results(results)

    async def _keyword_search(
        self,
        query: str,
        top_k: int,
        filters: Dict
    ) -> List[SearchResult]:
        """Traditional keyword search."""

        results = self.search_client.search(
            search_text=query,
            filter=self._build_filter(filters) if filters else None,
            select=["id", "title", "content", "metadata"],
            highlight_fields="content",
            top=top_k
        )

        return self._parse_results(results)

    async def _hybrid_search(
        self,
        query: str,
        top_k: int,
        filters: Dict
    ) -> List[SearchResult]:
        """Hybrid search combining semantic and keyword."""

        # Get embedding
        query_embedding = await self._get_embedding(query)

        # Hybrid query
        results = self.search_client.search(
            search_text=query,
            vector_queries=[{
                "vector": query_embedding,
                "k_nearest_neighbors": top_k * 2,
                "fields": "content_vector"
            }],
            filter=self._build_filter(filters) if filters else None,
            select=["id", "title", "content", "metadata"],
            highlight_fields="content",
            top=top_k
        )

        return self._parse_results(results)

    async def _get_embedding(self, text: str) -> List[float]:
        """Get embedding for text."""
        response = await self.embedding_client.create_embeddings(
            input=text,
            model="text-embedding-ada-002"
        )
        return response.data[0].embedding

    def _build_filter(self, filters: Dict) -> str:
        """Build OData filter string."""
        conditions = []
        for field, value in filters.items():
            if isinstance(value, list):
                # IN clause
                values_str = ", ".join([f"'{v}'" for v in value])
                conditions.append(f"{field} in ({values_str})")
            elif isinstance(value, dict):
                # Range
                if "gte" in value:
                    conditions.append(f"{field} ge {value['gte']}")
                if "lte" in value:
                    conditions.append(f"{field} le {value['lte']}")
            else:
                conditions.append(f"{field} eq '{value}'")

        return " and ".join(conditions)

    def _parse_results(self, results) -> List[SearchResult]:
        """Parse search results."""
        parsed = []
        for result in results:
            parsed.append(SearchResult(
                document_id=result["id"],
                title=result.get("title", ""),
                content=result.get("content", ""),
                score=result["@search.score"],
                metadata=result.get("metadata", {}),
                highlights=result.get("@search.highlights", {}).get("content", [])
            ))
        return parsed

Query Understanding and Expansion

class QueryUnderstanding:
    """Understand and enhance search queries."""

    def __init__(self, llm_client):
        self.client = llm_client

    async def expand_query(
        self,
        query: str,
        domain: str = None
    ) -> dict:
        """Expand query with synonyms and related terms."""

        prompt = f"""Expand this search query with related terms.

Query: {query}
{f'Domain: {domain}' if domain else ''}

Generate:
1. Synonyms for key terms
2. Related concepts
3. Alternative phrasings
4. Broader/narrower terms

Return as JSON:
{{
    "original": "...",
    "expanded_queries": ["...", "..."],
    "synonyms": {{"term": ["syn1", "syn2"]}},
    "related_concepts": ["..."]
}}"""

        response = await self.client.chat_completion(
            model="gpt-35-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3
        )

        return json.loads(response.content)

    async def extract_intent(self, query: str) -> dict:
        """Extract search intent from query."""

        prompt = f"""Analyze this search query to understand intent.

Query: {query}

Determine:
1. Primary intent (find_info/compare/how_to/troubleshoot/lookup)
2. Key entities mentioned
3. Temporal context (historical/current/future)
4. Specificity level (broad/specific)
5. Expected result type (document/answer/list)

Return as JSON."""

        response = await self.client.chat_completion(
            model="gpt-35-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )

        return json.loads(response.content)

    async def rewrite_query(
        self,
        query: str,
        context: str = None
    ) -> str:
        """Rewrite query for better search results."""

        prompt = f"""Rewrite this search query for better results.

Original Query: {query}
{f'Context: {context}' if context else ''}

Rules:
- Make the query more specific
- Add relevant technical terms
- Remove ambiguity
- Keep it concise

Return only the rewritten query."""

        response = await self.client.chat_completion(
            model="gpt-35-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )

        return response.content.strip()

    async def generate_sub_queries(
        self,
        complex_query: str
    ) -> List[str]:
        """Break complex query into sub-queries."""

        prompt = f"""Break this complex search query into simpler sub-queries.

Query: {complex_query}

Generate 2-5 simpler queries that together address the original query.
Each sub-query should be independently searchable.

Return as JSON array of strings."""

        response = await self.client.chat_completion(
            model="gpt-35-turbo",
            messages=[{"role": "user", "content": prompt}]
        )

        return json.loads(response.content)

Result Reranking and Summarization

class SearchResultProcessor:
    """Process and enhance search results."""

    def __init__(self, llm_client):
        self.client = llm_client

    async def rerank_results(
        self,
        query: str,
        results: List[SearchResult],
        top_k: int = 10
    ) -> List[SearchResult]:
        """Rerank results using LLM for relevance."""

        if len(results) <= top_k:
            return results

        # Prepare results for ranking
        results_text = "\n".join([
            f"[{i}] Title: {r.title}\nContent: {r.content[:500]}..."
            for i, r in enumerate(results)
        ])

        prompt = f"""Rank these search results by relevance to the query.

Query: {query}

Results:
{results_text}

Return the indices of the top {top_k} most relevant results in order of relevance.
Return as JSON array of indices: [0, 3, 1, ...]"""

        response = await self.client.chat_completion(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )

        try:
            ranking = json.loads(response.content)
            return [results[i] for i in ranking[:top_k] if i < len(results)]
        except:
            return results[:top_k]

    async def summarize_results(
        self,
        query: str,
        results: List[SearchResult]
    ) -> str:
        """Generate summary answer from search results."""

        context = "\n\n".join([
            f"Source: {r.title}\n{r.content}"
            for r in results[:5]
        ])

        prompt = f"""Based on these search results, provide a comprehensive answer.

Query: {query}

Search Results:
{context}

Provide:
1. Direct answer to the query
2. Key points from the sources
3. Note any conflicting information
4. Indicate confidence level

Format as clear, concise response."""

        response = await self.client.chat_completion(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}]
        )

        return response.content

    async def generate_follow_up_questions(
        self,
        query: str,
        results: List[SearchResult]
    ) -> List[str]:
        """Generate follow-up questions based on results."""

        summary = "\n".join([r.title for r in results[:5]])

        prompt = f"""Based on this search, suggest follow-up questions.

Original Query: {query}
Top Results: {summary}

Generate 3-5 relevant follow-up questions the user might want to explore.
Return as JSON array."""

        response = await self.client.chat_completion(
            model="gpt-35-turbo",
            messages=[{"role": "user", "content": prompt}]
        )

        return json.loads(response.content)

    async def extract_answer_snippets(
        self,
        query: str,
        results: List[SearchResult]
    ) -> List[dict]:
        """Extract specific answer snippets from results."""

        snippets = []

        for result in results[:5]:
            prompt = f"""Extract the most relevant snippet that answers this query.

Query: {query}
Document: {result.content}

If the document contains a relevant answer, extract it.
If not, respond with "NO_ANSWER".

Return just the relevant snippet or "NO_ANSWER"."""

            response = await self.client.chat_completion(
                model="gpt-35-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0
            )

            if response.content.strip() != "NO_ANSWER":
                snippets.append({
                    "document_id": result.document_id,
                    "title": result.title,
                    "snippet": response.content.strip()
                })

        return snippets

Document Indexing Pipeline

class SemanticIndexingPipeline:
    """Index documents for semantic search."""

    def __init__(self, config: dict):
        self.config = config
        self.embedding_client = config["embedding_client"]
        self.search_client = config["search_client"]

    async def index_document(
        self,
        document_id: str,
        title: str,
        content: str,
        metadata: Dict
    ):
        """Index a single document."""

        # Chunk content
        chunks = self._chunk_content(content)

        # Generate embeddings
        embeddings = await self._generate_embeddings(chunks)

        # Prepare documents for indexing
        documents = []
        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
            doc = {
                "id": f"{document_id}_chunk_{i}",
                "parent_id": document_id,
                "title": title,
                "content": chunk,
                "content_vector": embedding,
                "chunk_index": i,
                "metadata": metadata,
                "indexed_at": datetime.utcnow().isoformat()
            }
            documents.append(doc)

        # Upload to index
        self.search_client.upload_documents(documents)

        return len(documents)

    def _chunk_content(
        self,
        content: str,
        chunk_size: int = 1000,
        overlap: int = 200
    ) -> List[str]:
        """Chunk content with overlap."""
        chunks = []
        start = 0

        while start < len(content):
            end = start + chunk_size

            # Find natural break point
            if end < len(content):
                # Try to break at paragraph
                break_point = content.rfind("\n\n", start, end)
                if break_point == -1:
                    # Try sentence break
                    break_point = content.rfind(". ", start, end)
                if break_point != -1:
                    end = break_point + 1

            chunks.append(content[start:end].strip())
            start = end - overlap

        return chunks

    async def _generate_embeddings(
        self,
        texts: List[str]
    ) -> List[List[float]]:
        """Generate embeddings for texts."""
        embeddings = []

        # Batch processing
        batch_size = 16
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i + batch_size]
            response = await self.embedding_client.create_embeddings(
                input=batch,
                model="text-embedding-ada-002"
            )
            embeddings.extend([e.embedding for e in response.data])

        return embeddings

    async def reindex_collection(
        self,
        source_table: str,
        text_column: str,
        metadata_columns: List[str]
    ):
        """Reindex entire collection from source."""
        from pyspark.sql import SparkSession

        spark = SparkSession.builder.getOrCreate()
        df = spark.table(source_table)

        total_docs = df.count()
        indexed = 0

        for row in df.collect():
            await self.index_document(
                document_id=row["id"],
                title=row.get("title", ""),
                content=row[text_column],
                metadata={col: row[col] for col in metadata_columns}
            )
            indexed += 1

            if indexed % 100 == 0:
                print(f"Indexed {indexed}/{total_docs} documents")

        return indexed
class ConversationalSearch:
    """Multi-turn conversational search."""

    def __init__(self, search_engine, llm_client):
        self.search = search_engine
        self.client = llm_client
        self.conversation_history = []

    async def chat_search(
        self,
        user_message: str
    ) -> dict:
        """Handle conversational search turn."""

        # Add to history
        self.conversation_history.append({
            "role": "user",
            "content": user_message
        })

        # Reformulate query considering history
        search_query = await self._reformulate_query(user_message)

        # Execute search
        results = await self.search.search(search_query, top_k=5)

        # Generate conversational response
        response = await self._generate_response(user_message, results)

        # Add response to history
        self.conversation_history.append({
            "role": "assistant",
            "content": response
        })

        return {
            "response": response,
            "search_query": search_query,
            "sources": [{"title": r.title, "id": r.document_id} for r in results]
        }

    async def _reformulate_query(self, user_message: str) -> str:
        """Reformulate query based on conversation history."""

        if len(self.conversation_history) <= 1:
            return user_message

        history_text = "\n".join([
            f"{m['role']}: {m['content']}"
            for m in self.conversation_history[-6:]  # Last 3 turns
        ])

        prompt = f"""Given this conversation, reformulate the latest user message into a standalone search query.

Conversation:
{history_text}

Create a search query that captures what the user is looking for, including context from previous messages.
Return only the search query."""

        response = await self.client.chat_completion(
            model="gpt-35-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )

        return response.content.strip()

    async def _generate_response(
        self,
        user_message: str,
        results: List[SearchResult]
    ) -> str:
        """Generate conversational response."""

        context = "\n\n".join([
            f"[{r.title}]: {r.content[:500]}"
            for r in results
        ])

        history = self.conversation_history[-4:] if len(self.conversation_history) > 4 else self.conversation_history

        prompt = f"""You are a helpful search assistant. Answer based on the search results.

Previous conversation:
{json.dumps(history[:-1], indent=2)}

User's question: {user_message}

Search Results:
{context}

Provide a helpful, conversational response that:
1. Directly answers the question
2. References relevant sources
3. Suggests follow-up if appropriate"""

        response = await self.client.chat_completion(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}]
        )

        return response.content

Enterprise semantic search transforms how organizations find and use knowledge. By understanding meaning rather than just matching keywords, these systems unlock insights hidden in document repositories.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Pena

Michael John Pena

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.