January 27, 2024 1 min read

Hybrid Search Optimization: Combining Vector, Keyword, and Semantic Search

Hybrid Search Azure AI Search Vector Search RAG Search Optimization

Hybrid search combines multiple retrieval methods to overcome individual limitations. Understanding how to optimize the combination is key to building effective RAG systems.

Why Hybrid Search?

Each search method has strengths and weaknesses:

Method	Strengths	Weaknesses
Keyword (BM25)	Exact matches, acronyms	No semantic understanding
Vector	Semantic similarity	Misses exact terms
Semantic	Intent understanding	Requires reranking overhead

Basic Hybrid Configuration

from azure.search.documents.models import VectorizedQuery, VectorQuery

def hybrid_search(query: str, query_vector: list, top_k: int = 10):
    """Basic hybrid search combining vector and keyword."""

    vector_query = VectorizedQuery(
        vector=query_vector,
        k_nearest_neighbors=top_k * 2,
        fields="content_vector"
    )

    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        top=top_k
    )

    return list(results)

Advanced: Three-Way Hybrid

def advanced_hybrid_search(query: str, query_vector: list):
    """Combine vector + keyword + semantic ranking."""

    vector_query = VectorizedQuery(
        vector=query_vector,
        k_nearest_neighbors=50,
        fields="content_vector"
    )

    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        query_type="semantic",
        semantic_configuration_name="my-semantic-config",
        query_caption="extractive",
        top=10
    )

    return [
        {
            "id": r["id"],
            "title": r["title"],
            "score": r["@search.score"],
            "reranker_score": r.get("@search.reranker_score"),
            "captions": r.get("@search.captions")
        }
        for r in results
    ]

Score Fusion Strategies

Reciprocal Rank Fusion (RRF)

def reciprocal_rank_fusion(result_lists: list[list], k: int = 60):
    """Combine multiple result lists using RRF."""

    scores = {}

    for results in result_lists:
        for rank, doc in enumerate(results):
            doc_id = doc["id"]
            if doc_id not in scores:
                scores[doc_id] = {"doc": doc, "score": 0}
            scores[doc_id]["score"] += 1 / (k + rank + 1)

    # Sort by fused score
    sorted_results = sorted(
        scores.values(),
        key=lambda x: x["score"],
        reverse=True
    )

    return [r["doc"] for r in sorted_results]

# Usage
keyword_results = keyword_search(query)
vector_results = vector_search(query_vector)
fused_results = reciprocal_rank_fusion([keyword_results, vector_results])

Weighted Combination

def weighted_hybrid(
    query: str,
    query_vector: list,
    keyword_weight: float = 0.3,
    vector_weight: float = 0.7
):
    """Weighted combination of keyword and vector scores."""

    # Separate searches
    keyword_results = search_client.search(search_text=query, top=50)
    vector_results = search_client.search(
        vector_queries=[VectorizedQuery(vector=query_vector, k=50, fields="content_vector")]
    )

    # Normalize and combine scores
    scores = {}

    for r in keyword_results:
        scores[r["id"]] = {"doc": r, "keyword_score": r["@search.score"], "vector_score": 0}

    for r in vector_results:
        if r["id"] in scores:
            scores[r["id"]]["vector_score"] = r["@search.score"]
        else:
            scores[r["id"]] = {"doc": r, "keyword_score": 0, "vector_score": r["@search.score"]}

    # Calculate weighted score
    for doc_id in scores:
        ks = normalize_score(scores[doc_id]["keyword_score"])
        vs = normalize_score(scores[doc_id]["vector_score"])
        scores[doc_id]["final_score"] = keyword_weight * ks + vector_weight * vs

    return sorted(scores.values(), key=lambda x: x["final_score"], reverse=True)

Optimization Tips

Query Analysis

def analyze_query_for_search(query: str) -> dict:
    """Analyze query to optimize search strategy."""

    analysis = {
        "has_exact_terms": bool(re.search(r'"[^"]+"', query)),
        "is_question": query.strip().endswith("?"),
        "has_technical_terms": bool(re.search(r'\b[A-Z]{2,}\b', query)),
        "word_count": len(query.split())
    }

    # Recommend weights
    if analysis["has_exact_terms"]:
        analysis["recommended_keyword_weight"] = 0.6
    elif analysis["is_question"]:
        analysis["recommended_keyword_weight"] = 0.3
    else:
        analysis["recommended_keyword_weight"] = 0.4

    return analysis

Dynamic Weight Adjustment

def adaptive_hybrid_search(query: str, query_vector: list):
    """Adapt search weights based on query characteristics."""

    analysis = analyze_query_for_search(query)

    keyword_weight = analysis["recommended_keyword_weight"]
    vector_weight = 1 - keyword_weight

    return weighted_hybrid(query, query_vector, keyword_weight, vector_weight)

Evaluation

def evaluate_hybrid_configs(test_queries: list, configs: list[dict]):
    """Evaluate different hybrid configurations."""

    results = {}

    for config in configs:
        name = config["name"]
        results[name] = {"precision": [], "recall": [], "mrr": []}

        for test in test_queries:
            search_results = hybrid_search(
                test["query"],
                test["vector"],
                keyword_weight=config["keyword_weight"],
                vector_weight=config["vector_weight"]
            )

            retrieved = [r["id"] for r in search_results[:10]]
            relevant = test["relevant_docs"]

            results[name]["precision"].append(
                len(set(retrieved) & set(relevant)) / len(retrieved)
            )
            results[name]["recall"].append(
                len(set(retrieved) & set(relevant)) / len(relevant)
            )

    return {
        name: {
            "avg_precision": sum(m["precision"]) / len(m["precision"]),
            "avg_recall": sum(m["recall"]) / len(m["recall"])
        }
        for name, m in results.items()
    }

Best Practices

Always use hybrid for production RAG
Add semantic reranking for final relevance boost
Tune weights based on your data and queries
Measure quality with representative test queries
Consider query type when setting weights

Conclusion

Hybrid search is essential for production RAG systems. The combination of vector (semantic similarity) and keyword (exact matching) with semantic reranking delivers the best results across diverse query types.