Back to Blog
7 min read

Hybrid Search Implementation: Combining Vector and Keyword Search

Pure vector search has limitations - it can miss exact matches and struggle with specific terminology. Hybrid search combines vector similarity with traditional keyword search for better results. Let’s implement it.

Query: "Error code ERR_CONNECTION_REFUSED"

Vector Search Only:
- Finds documents about connection errors (semantic)
- May miss exact error code matches

Keyword Search Only:
- Finds exact "ERR_CONNECTION_REFUSED" matches
- Misses semantically related troubleshooting docs

Hybrid Search:
- Finds exact matches (keyword)
- PLUS semantically related docs (vector)
- Best of both worlds

Azure AI Search Hybrid Implementation

from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
from azure.identity import DefaultAzureCredential

class AzureHybridSearch:
    def __init__(self, endpoint: str, index_name: str, embedding_model):
        self.client = SearchClient(
            endpoint=endpoint,
            index_name=index_name,
            credential=DefaultAzureCredential()
        )
        self.embedding_model = embedding_model

    def search(
        self,
        query: str,
        filter: str = None,
        top: int = 10,
        vector_weight: float = 0.5,
        keyword_weight: float = 0.5
    ) -> list[dict]:
        """Execute hybrid search with configurable weights."""

        # Generate query embedding
        query_vector = self.embedding_model.embed(query)

        # Hybrid query: text + vector
        results = self.client.search(
            search_text=query,  # Keyword search
            vector_queries=[
                VectorizedQuery(
                    vector=query_vector,
                    k_nearest_neighbors=top * 2,  # Over-fetch for reranking
                    fields="content_vector",
                    weight=vector_weight  # Vector component weight
                )
            ],
            filter=filter,
            query_type="semantic",  # Enable semantic ranking
            semantic_configuration_name="default",
            top=top,
            select=["id", "title", "content", "source"]
        )

        return [
            {
                "id": r["id"],
                "title": r["title"],
                "content": r["content"],
                "source": r["source"],
                "score": r["@search.score"],
                "reranker_score": r.get("@search.reranker_score")
            }
            for r in results
        ]

Custom Hybrid Search with Score Fusion

When using databases without built-in hybrid search:

import numpy as np
from dataclasses import dataclass
from typing import Callable

@dataclass
class SearchResult:
    id: str
    content: str
    score: float
    source: str  # 'vector' or 'keyword'

class HybridSearchEngine:
    def __init__(
        self,
        vector_search_fn: Callable,
        keyword_search_fn: Callable,
        fusion_method: str = "rrf"
    ):
        self.vector_search = vector_search_fn
        self.keyword_search = keyword_search_fn
        self.fusion_method = fusion_method

    def search(
        self,
        query: str,
        query_vector: list[float],
        top_k: int = 10,
        vector_weight: float = 0.5
    ) -> list[SearchResult]:
        """Execute hybrid search with score fusion."""

        # Get results from both sources
        vector_results = self.vector_search(query_vector, top_k=top_k * 2)
        keyword_results = self.keyword_search(query, top_k=top_k * 2)

        # Fuse results
        if self.fusion_method == "rrf":
            return self._reciprocal_rank_fusion(
                vector_results, keyword_results, top_k, vector_weight
            )
        elif self.fusion_method == "linear":
            return self._linear_combination(
                vector_results, keyword_results, top_k, vector_weight
            )
        else:
            raise ValueError(f"Unknown fusion method: {self.fusion_method}")

    def _reciprocal_rank_fusion(
        self,
        vector_results: list[SearchResult],
        keyword_results: list[SearchResult],
        top_k: int,
        vector_weight: float,
        k: int = 60  # RRF constant
    ) -> list[SearchResult]:
        """Reciprocal Rank Fusion - robust score combination."""

        scores = {}
        contents = {}

        # Score vector results
        for rank, result in enumerate(vector_results):
            rrf_score = vector_weight * (1 / (k + rank + 1))
            scores[result.id] = scores.get(result.id, 0) + rrf_score
            contents[result.id] = result.content

        # Score keyword results
        keyword_weight = 1 - vector_weight
        for rank, result in enumerate(keyword_results):
            rrf_score = keyword_weight * (1 / (k + rank + 1))
            scores[result.id] = scores.get(result.id, 0) + rrf_score
            contents[result.id] = result.content

        # Sort by combined score
        sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)

        return [
            SearchResult(
                id=doc_id,
                content=contents[doc_id],
                score=scores[doc_id],
                source="hybrid"
            )
            for doc_id in sorted_ids[:top_k]
        ]

    def _linear_combination(
        self,
        vector_results: list[SearchResult],
        keyword_results: list[SearchResult],
        top_k: int,
        vector_weight: float
    ) -> list[SearchResult]:
        """Linear combination of normalized scores."""

        # Normalize scores
        def normalize(results):
            if not results:
                return results
            max_score = max(r.score for r in results)
            min_score = min(r.score for r in results)
            if max_score == min_score:
                return [(r.id, r.content, 1.0) for r in results]
            return [
                (r.id, r.content, (r.score - min_score) / (max_score - min_score))
                for r in results
            ]

        vector_normalized = normalize(vector_results)
        keyword_normalized = normalize(keyword_results)

        scores = {}
        contents = {}

        for doc_id, content, score in vector_normalized:
            scores[doc_id] = vector_weight * score
            contents[doc_id] = content

        keyword_weight = 1 - vector_weight
        for doc_id, content, score in keyword_normalized:
            scores[doc_id] = scores.get(doc_id, 0) + keyword_weight * score
            contents[doc_id] = content

        sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)

        return [
            SearchResult(id=doc_id, content=contents[doc_id], score=scores[doc_id], source="hybrid")
            for doc_id in sorted_ids[:top_k]
        ]
import psycopg2
from pgvector.psycopg2 import register_vector

class PostgresHybridSearch:
    def __init__(self, conn_string: str):
        self.conn = psycopg2.connect(conn_string)
        register_vector(self.conn)

    def search(
        self,
        query: str,
        query_vector: list[float],
        top_k: int = 10,
        vector_weight: float = 0.5
    ) -> list[dict]:
        """Hybrid search using PostgreSQL full-text + pgvector."""

        keyword_weight = 1 - vector_weight

        with self.conn.cursor() as cur:
            # Combined query using RRF-style fusion
            cur.execute("""
                WITH vector_search AS (
                    SELECT
                        id,
                        title,
                        content,
                        ROW_NUMBER() OVER (ORDER BY embedding <=> %s) as vector_rank
                    FROM documents
                    ORDER BY embedding <=> %s
                    LIMIT %s
                ),
                keyword_search AS (
                    SELECT
                        id,
                        title,
                        content,
                        ROW_NUMBER() OVER (ORDER BY ts_rank(to_tsvector('english', content), plainto_tsquery('english', %s)) DESC) as keyword_rank
                    FROM documents
                    WHERE to_tsvector('english', content) @@ plainto_tsquery('english', %s)
                    LIMIT %s
                ),
                combined AS (
                    SELECT
                        COALESCE(v.id, k.id) as id,
                        COALESCE(v.title, k.title) as title,
                        COALESCE(v.content, k.content) as content,
                        COALESCE(%s * (1.0 / (60 + v.vector_rank)), 0) +
                        COALESCE(%s * (1.0 / (60 + k.keyword_rank)), 0) as rrf_score
                    FROM vector_search v
                    FULL OUTER JOIN keyword_search k ON v.id = k.id
                )
                SELECT id, title, content, rrf_score
                FROM combined
                ORDER BY rrf_score DESC
                LIMIT %s
            """, (
                query_vector, query_vector, top_k * 2,
                query, query, top_k * 2,
                vector_weight, keyword_weight,
                top_k
            ))

            results = cur.fetchall()

        return [
            {"id": r[0], "title": r[1], "content": r[2], "score": r[3]}
            for r in results
        ]

Adjust weights based on query characteristics:

class AdaptiveHybridSearch:
    def __init__(self, hybrid_engine: HybridSearchEngine, llm_client):
        self.engine = hybrid_engine
        self.llm = llm_client

    async def search(self, query: str, query_vector: list[float], top_k: int = 10) -> list[dict]:
        """Adaptively adjust weights based on query type."""

        # Analyze query to determine optimal weights
        weights = await self._analyze_query(query)

        results = self.engine.search(
            query=query,
            query_vector=query_vector,
            top_k=top_k,
            vector_weight=weights["vector"]
        )

        return {
            "results": results,
            "weights_used": weights,
            "query_analysis": weights.get("reasoning")
        }

    async def _analyze_query(self, query: str) -> dict:
        """Use LLM to determine optimal search weights."""

        response = await self.llm.chat.complete_async(
            deployment="gpt-4o-mini",
            messages=[{
                "role": "user",
                "content": f"""Analyze this search query and determine optimal weights for hybrid search:

                Query: {query}

                Consider:
                - Does it contain specific terms (error codes, product names)?
                - Is it a conceptual/semantic query?
                - Does it mix both?

                Return JSON:
                {{
                    "vector": 0.0-1.0,  // Weight for semantic/vector search
                    "keyword": 0.0-1.0, // Weight for keyword search (must sum to 1)
                    "reasoning": "brief explanation"
                }}"""
            }]
        )

        weights = json.loads(response.choices[0].message.content)

        # Ensure weights are valid
        total = weights["vector"] + weights["keyword"]
        weights["vector"] /= total
        weights["keyword"] /= total

        return weights

# Example adaptive behavior:
# "ERR_CONNECTION_REFUSED troubleshooting" -> keyword: 0.7, vector: 0.3
# "how to fix network connectivity issues" -> keyword: 0.3, vector: 0.7
# "API rate limiting best practices" -> keyword: 0.5, vector: 0.5

Evaluation

class HybridSearchEvaluator:
    def evaluate(
        self,
        queries: list[dict],  # {"query": str, "relevant_ids": list[str]}
        search_fn: Callable,
        k_values: list[int] = [1, 5, 10]
    ) -> dict:
        """Evaluate hybrid search performance."""

        metrics = {f"recall@{k}": [] for k in k_values}
        metrics.update({f"precision@{k}": [] for k in k_values})
        metrics["mrr"] = []

        for item in queries:
            query = item["query"]
            relevant = set(item["relevant_ids"])

            results = search_fn(query)
            result_ids = [r["id"] for r in results]

            # Calculate metrics
            for k in k_values:
                top_k_ids = set(result_ids[:k])
                hits = len(top_k_ids & relevant)

                metrics[f"recall@{k}"].append(hits / len(relevant))
                metrics[f"precision@{k}"].append(hits / k)

            # MRR
            mrr = 0
            for i, rid in enumerate(result_ids):
                if rid in relevant:
                    mrr = 1 / (i + 1)
                    break
            metrics["mrr"].append(mrr)

        # Average metrics
        return {k: np.mean(v) for k, v in metrics.items()}

Hybrid search delivers better results than either approach alone. Implement it early in your RAG pipeline and tune the weights based on your specific use case.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.