January 27, 2025 1 min read

Hybrid Search Implementation: Combining Vector and Keyword Search

Pure vector search has limitations - it can miss exact matches and struggle with specific terminology. Hybrid search combines vector similarity with traditional keyword search for better results. Let’s implement it.

Why Hybrid Search?

Query: "Error code ERR_CONNECTION_REFUSED"

Vector Search Only:
- Finds documents about connection errors (semantic)
- May miss exact error code matches

Keyword Search Only:
- Finds exact "ERR_CONNECTION_REFUSED" matches
- Misses semantically related troubleshooting docs

Hybrid Search:
- Finds exact matches (keyword)
- PLUS semantically related docs (vector)
- Best of both worlds

Azure AI Search Hybrid Implementation

from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from azure.identity import DefaultAzureCredential

class AzureHybridSearch:
    def __init__(self, endpoint: str, index_name: str, embedding_model):
        self.client = SearchClient(
            endpoint=endpoint,
            index_name=index_name,
            credential=DefaultAzureCredential()
        )
        self.embedding_model = embedding_model

    def search(
        self,
        query: str,
        filter: str = None,
        top: int = 10,
        vector_weight: float = 0.5,
        keyword_weight: float = 0.5
    ) -> list[dict]:
        """Execute hybrid search with configurable weights."""

        # Generate query embedding
        query_vector = self.embedding_model.embed(query)

        # Hybrid query: text + vector
        results = self.client.search(
            search_text=query,  # Keyword search
            vector_queries=[
                VectorizedQuery(
                    vector=query_vector,
                    k_nearest_neighbors=top * 2,  # Over-fetch for reranking
                    fields="content_vector",
                    weight=vector_weight  # Vector component weight
                )
            ],
            filter=filter,
            query_type="semantic",  # Enable semantic ranking
            semantic_configuration_name="default",
            top=top,
            select=["id", "title", "content", "source"]
        )

        return [
            {
                "id": r["id"],
                "title": r["title"],
                "content": r["content"],
                "source": r["source"],
                "score": r["@search.score"],
                "reranker_score": r.get("@search.reranker_score")
            }
            for r in results
        ]

Custom Hybrid Search with Score Fusion

When using databases without built-in hybrid search:

import numpy as np
from dataclasses import dataclass
from typing import Callable

@dataclass
class SearchResult:
    id: str
    content: str
    score: float
    source: str  # 'vector' or 'keyword'

class HybridSearchEngine:
    def __init__(
        self,
        vector_search_fn: Callable,
        keyword_search_fn: Callable,
        fusion_method: str = "rrf"
    ):
        self.vector_search = vector_search_fn
        self.keyword_search = keyword_search_fn
        self.fusion_method = fusion_method

    def search(
        self,
        query: str,
        query_vector: list[float],
        top_k: int = 10,
        vector_weight: float = 0.5
    ) -> list[SearchResult]:
        """Execute hybrid search with score fusion."""

        # Get results from both sources
        vector_results = self.vector_search(query_vector, top_k=top_k * 2)
        keyword_results = self.keyword_search(query, top_k=top_k * 2)

        # Fuse results
        if self.fusion_method == "rrf":
            return self._reciprocal_rank_fusion(
                vector_results, keyword_results, top_k, vector_weight
            )
        elif self.fusion_method == "linear":
            return self._linear_combination(
                vector_results, keyword_results, top_k, vector_weight
            )
        else:
            raise ValueError(f"Unknown fusion method: {self.fusion_method}")

    def _reciprocal_rank_fusion(
        self,
        vector_results: list[SearchResult],
        keyword_results: list[SearchResult],
        top_k: int,
        vector_weight: float,
        k: int = 60  # RRF constant
    ) -> list[SearchResult]:
        """Reciprocal Rank Fusion - robust score combination."""

        scores = {}
        contents = {}

        # Score vector results
        for rank, result in enumerate(vector_results):
            rrf_score = vector_weight * (1 / (k + rank + 1))
            scores[result.id] = scores.get(result.id, 0) + rrf_score
            contents[result.id] = result.content

        # Score keyword results
        keyword_weight = 1 - vector_weight
        for rank, result in enumerate(keyword_results):
            rrf_score = keyword_weight * (1 / (k + rank + 1))
            scores[result.id] = scores.get(result.id, 0) + rrf_score
            contents[result.id] = result.content

        # Sort by combined score
        sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)

        return [
            SearchResult(
                id=doc_id,
                content=contents[doc_id],
                score=scores[doc_id],
                source="hybrid"
            )
            for doc_id in sorted_ids[:top_k]
        ]

    def _linear_combination(
        self,
        vector_results: list[SearchResult],
        keyword_results: list[SearchResult],
        top_k: int,
        vector_weight: float
    ) -> list[SearchResult]:
        """Linear combination of normalized scores."""

        # Normalize scores
        def normalize(results):
            if not results:
                return results
            max_score = max(r.score for r in results)
            min_score = min(r.score for r in results)
            if max_score == min_score:
                return [(r.id, r.content, 1.0) for r in results]
            return [
                (r.id, r.content, (r.score - min_score) / (max_score - min_score))
                for r in results
            ]

        vector_normalized = normalize(vector_results)
        keyword_normalized = normalize(keyword_results)

        scores = {}
        contents = {}

        for doc_id, content, score in vector_normalized:
            scores[doc_id] = vector_weight * score
            contents[doc_id] = content

        keyword_weight = 1 - vector_weight
        for doc_id, content, score in keyword_normalized:
            scores[doc_id] = scores.get(doc_id, 0) + keyword_weight * score
            contents[doc_id] = content

        sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)

        return [
            SearchResult(id=doc_id, content=contents[doc_id], score=scores[doc_id], source="hybrid")
            for doc_id in sorted_ids[:top_k]
        ]

PostgreSQL Hybrid Search

import psycopg2
from pgvector.psycopg2 import register_vector

class PostgresHybridSearch:
    def __init__(self, conn_string: str):
        self.conn = psycopg2.connect(conn_string)
        register_vector(self.conn)

    def search(
        self,
        query: str,
        query_vector: list[float],
        top_k: int = 10,
        vector_weight: float = 0.5
    ) -> list[dict]:
        """Hybrid search using PostgreSQL full-text + pgvector."""

        keyword_weight = 1 - vector_weight

        with self.conn.cursor() as cur:
            # Combined query using RRF-style fusion
            cur.execute("""
                WITH vector_search AS (
                    SELECT
                        id,
                        title,
                        content,
                        ROW_NUMBER() OVER (ORDER BY embedding <=> %s) as vector_rank
                    FROM documents
                    ORDER BY embedding <=> %s
                    LIMIT %s
                ),
                keyword_search AS (
                    SELECT
                        id,
                        title,
                        content,
                        ROW_NUMBER() OVER (ORDER BY ts_rank(to_tsvector('english', content), plainto_tsquery('english', %s)) DESC) as keyword_rank
                    FROM documents
                    WHERE to_tsvector('english', content) @@ plainto_tsquery('english', %s)
                    LIMIT %s
                ),
                combined AS (
                    SELECT
                        COALESCE(v.id, k.id) as id,
                        COALESCE(v.title, k.title) as title,
                        COALESCE(v.content, k.content) as content,
                        COALESCE(%s * (1.0 / (60 + v.vector_rank)), 0) +
                        COALESCE(%s * (1.0 / (60 + k.keyword_rank)), 0) as rrf_score
                    FROM vector_search v
                    FULL OUTER JOIN keyword_search k ON v.id = k.id
                )
                SELECT id, title, content, rrf_score
                FROM combined
                ORDER BY rrf_score DESC
                LIMIT %s
            """, (
                query_vector, query_vector, top_k * 2,
                query, query, top_k * 2,
                vector_weight, keyword_weight,
                top_k
            ))

            results = cur.fetchall()

        return [
            {"id": r[0], "title": r[1], "content": r[2], "score": r[3]}
            for r in results
        ]

Adaptive Hybrid Search

Adjust weights based on query characteristics:

class AdaptiveHybridSearch:
    def __init__(self, hybrid_engine: HybridSearchEngine, llm_client):
        self.engine = hybrid_engine
        self.llm = llm_client

    async def search(self, query: str, query_vector: list[float], top_k: int = 10) -> list[dict]:
        """Adaptively adjust weights based on query type."""

        # Analyze query to determine optimal weights
        weights = await self._analyze_query(query)

        results = self.engine.search(
            query=query,
            query_vector=query_vector,
            top_k=top_k,
            vector_weight=weights["vector"]
        )

        return {
            "results": results,
            "weights_used": weights,
            "query_analysis": weights.get("reasoning")
        }

    async def _analyze_query(self, query: str) -> dict:
        """Use LLM to determine optimal search weights."""

        response = await self.llm.chat.complete_async(
            deployment="gpt-4o-mini",
            messages=[{
                "role": "user",
                "content": f"""Analyze this search query and determine optimal weights for hybrid search:

                Query: {query}

                Consider:
                - Does it contain specific terms (error codes, product names)?
                - Is it a conceptual/semantic query?
                - Does it mix both?

                Return JSON:
                {{
                    "vector": 0.0-1.0,  // Weight for semantic/vector search
                    "keyword": 0.0-1.0, // Weight for keyword search (must sum to 1)
                    "reasoning": "brief explanation"
                }}"""
            }]
        )

        weights = json.loads(response.choices[0].message.content)

        # Ensure weights are valid
        total = weights["vector"] + weights["keyword"]
        weights["vector"] /= total
        weights["keyword"] /= total

        return weights

# Example adaptive behavior:
# "ERR_CONNECTION_REFUSED troubleshooting" -> keyword: 0.7, vector: 0.3
# "how to fix network connectivity issues" -> keyword: 0.3, vector: 0.7
# "API rate limiting best practices" -> keyword: 0.5, vector: 0.5

Evaluation

class HybridSearchEvaluator:
    def evaluate(
        self,
        queries: list[dict],  # {"query": str, "relevant_ids": list[str]}
        search_fn: Callable,
        k_values: list[int] = [1, 5, 10]
    ) -> dict:
        """Evaluate hybrid search performance."""

        metrics = {f"recall@{k}": [] for k in k_values}
        metrics.update({f"precision@{k}": [] for k in k_values})
        metrics["mrr"] = []

        for item in queries:
            query = item["query"]
            relevant = set(item["relevant_ids"])

            results = search_fn(query)
            result_ids = [r["id"] for r in results]

            # Calculate metrics
            for k in k_values:
                top_k_ids = set(result_ids[:k])
                hits = len(top_k_ids & relevant)

                metrics[f"recall@{k}"].append(hits / len(relevant))
                metrics[f"precision@{k}"].append(hits / k)

            # MRR
            mrr = 0
            for i, rid in enumerate(result_ids):
                if rid in relevant:
                    mrr = 1 / (i + 1)
                    break
            metrics["mrr"].append(mrr)

        # Average metrics
        return {k: np.mean(v) for k, v in metrics.items()}

Hybrid search delivers better results than either approach alone. Implement it early in your RAG pipeline and tune the weights based on your specific use case.