1 min read
Implementing RAG with LangChain and Azure OpenAI
I wrote “Implementing RAG with LangChain and Azure OpenAI” to share practical, production-minded guidance on this topic.
Setting Up the RAG Pipeline
Configure LangChain with Azure services:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores import AzureSearch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
class RAGPipeline:
def __init__(
self,
azure_endpoint: str,
api_key: str,
deployment_name: str,
embedding_deployment: str,
search_endpoint: str,
search_key: str,
index_name: str
):
self.llm = AzureChatOpenAI(
azure_endpoint=azure_endpoint,
api_key=api_key,
deployment_name=deployment_name,
temperature=0.1
)
self.embeddings = AzureOpenAIEmbeddings(
azure_endpoint=azure_endpoint,
api_key=api_key,
deployment=embedding_deployment
)
self.vector_store = AzureSearch(
azure_search_endpoint=search_endpoint,
azure_search_key=search_key,
index_name=index_name,
embedding_function=self.embeddings.embed_query
)
def ingest_documents(self, documents: list[str], metadata: list[dict] = None):
"""Split and ingest documents into vector store."""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", ". ", " ", ""]
)
chunks = []
chunk_metadata = []
for i, doc in enumerate(documents):
doc_chunks = text_splitter.split_text(doc)
chunks.extend(doc_chunks)
doc_meta = metadata[i] if metadata else {}
chunk_metadata.extend([{**doc_meta, "chunk_index": j} for j in range(len(doc_chunks))])
self.vector_store.add_texts(chunks, metadatas=chunk_metadata)
Building the QA Chain
Create a retrieval chain with custom prompts:
def create_qa_chain(self) -> RetrievalQA:
"""Create a question-answering chain with retrieval."""
prompt_template = """Use the following context to answer the question.
If you cannot answer based on the context, say "I don't have enough information to answer this question."
Context:
{context}
Question: {question}
Answer: Provide a clear, concise answer based on the context above."""
prompt = PromptTemplate(
template=prompt_template,
input_variables=["context", "question"]
)
retriever = self.vector_store.as_retriever(
search_type="similarity",
search_kwargs={"k": 5}
)
qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": prompt}
)
return qa_chain
def answer_question(self, question: str) -> dict:
"""Answer a question using the RAG pipeline."""
qa_chain = self.create_qa_chain()
result = qa_chain.invoke({"query": question})
return {
"answer": result["result"],
"sources": [doc.metadata for doc in result["source_documents"]]
}
Optimizing Retrieval Quality
Experiment with chunk sizes, overlap, and retrieval strategies. Hybrid search combining semantic and keyword matching often provides the best results for enterprise applications.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n