January 15, 2025 1 min read

AI Application Patterns for 2025: Building Production-Ready AI Systems

AI Architecture Design Patterns Azure LLM

Building AI applications requires different patterns than traditional software. Let’s explore the proven patterns for production AI systems in 2025.

Pattern 1: Retrieval-Augmented Generation (RAG)

The most common pattern for knowledge-based AI:

from azure.ai.foundry import AIFoundryClient
from azure.search.documents import SearchClient
from azure.identity import DefaultAzureCredential

class RAGApplication:
    def __init__(self):
        self.ai_client = AIFoundryClient(
            project="my-project",
            credential=DefaultAzureCredential()
        )
        self.search_client = SearchClient(
            endpoint="https://search.search.windows.net",
            index_name="documents",
            credential=DefaultAzureCredential()
        )

    def retrieve(self, query: str, top_k: int = 5) -> list[dict]:
        """Retrieve relevant documents."""
        # Generate query embedding
        embedding = self.ai_client.embeddings.create(
            deployment="embeddings",
            input=[query]
        ).data[0].embedding

        # Vector search
        results = self.search_client.search(
            search_text=query,
            vector_queries=[{
                "vector": embedding,
                "k_nearest_neighbors": top_k,
                "fields": "content_vector"
            }],
            select=["title", "content", "source"]
        )

        return [{"title": r["title"], "content": r["content"], "source": r["source"]}
                for r in results]

    def generate(self, query: str, context: list[dict]) -> str:
        """Generate answer using retrieved context."""
        context_text = "\n\n".join([
            f"Source: {doc['source']}\n{doc['content']}"
            for doc in context
        ])

        response = self.ai_client.chat.complete(
            deployment="gpt-4o",
            messages=[
                {"role": "system", "content": f"""Answer questions based on the provided context.
                If the context doesn't contain the answer, say so.
                Always cite your sources.

                Context:
                {context_text}"""},
                {"role": "user", "content": query}
            ]
        )

        return response.choices[0].message.content

    def query(self, question: str) -> dict:
        """Full RAG pipeline."""
        # Retrieve
        context = self.retrieve(question)

        # Generate
        answer = self.generate(question, context)

        return {
            "question": question,
            "answer": answer,
            "sources": [doc["source"] for doc in context]
        }

Pattern 2: Agent with Tools

For tasks requiring actions beyond text generation:

from azure.ai.foundry.agents import Agent, Tool, AgentExecutor

class DataAnalystAgent:
    def __init__(self, ai_client):
        self.ai_client = ai_client
        self.tools = self._setup_tools()
        self.agent = self._setup_agent()

    def _setup_tools(self) -> list[Tool]:
        return [
            Tool(
                name="sql_query",
                description="Execute SQL query against the warehouse",
                function=self._execute_sql
            ),
            Tool(
                name="python_code",
                description="Execute Python code for data analysis",
                function=self._execute_python
            ),
            Tool(
                name="create_chart",
                description="Create a visualization",
                function=self._create_chart
            ),
            Tool(
                name="save_file",
                description="Save results to a file",
                function=self._save_file
            )
        ]

    def _setup_agent(self) -> Agent:
        return Agent(
            model="gpt-4o",
            instructions="""You are a data analyst. Help users analyze data by:
            1. Understanding their question
            2. Writing and executing SQL queries
            3. Analyzing results with Python if needed
            4. Creating visualizations
            5. Explaining your findings

            Always show your work and explain your reasoning.""",
            tools=self.tools,
            max_iterations=10
        )

    async def analyze(self, request: str) -> dict:
        executor = AgentExecutor(self.agent)
        result = await executor.run(request)
        return {
            "request": request,
            "response": result.final_answer,
            "steps": result.execution_steps,
            "artifacts": result.artifacts  # Charts, files, etc.
        }

    def _execute_sql(self, query: str) -> str:
        # Implementation
        pass

    def _execute_python(self, code: str) -> str:
        # Safe execution in sandbox
        pass

    def _create_chart(self, spec: dict) -> str:
        # Create and return chart URL
        pass

    def _save_file(self, content: str, filename: str) -> str:
        # Save and return path
        pass

Pattern 3: Chain of Thought (CoT)

For complex reasoning tasks:

class ChainOfThoughtReasoner:
    def __init__(self, ai_client):
        self.ai_client = ai_client

    async def reason(self, problem: str, steps: int = 5) -> dict:
        """Multi-step reasoning with explicit thought process."""

        messages = [
            {"role": "system", "content": """You are a careful analytical thinker.
            For each problem:
            1. Break it into smaller sub-problems
            2. Solve each step explicitly
            3. Show your reasoning
            4. Verify your answer

            Think step by step."""},
            {"role": "user", "content": f"Problem: {problem}\n\nLet's think through this step by step."}
        ]

        thoughts = []

        for step in range(steps):
            response = await self.ai_client.chat.complete_async(
                deployment="gpt-4o",
                messages=messages,
                temperature=0.2  # Low temperature for reasoning
            )

            thought = response.choices[0].message.content
            thoughts.append({"step": step + 1, "thought": thought})

            # Check if we've reached a conclusion
            if "final answer" in thought.lower() or "conclusion" in thought.lower():
                break

            # Continue reasoning
            messages.append({"role": "assistant", "content": thought})
            messages.append({"role": "user", "content": "Continue your analysis. What's the next step?"})

        # Extract final answer
        final = await self.ai_client.chat.complete_async(
            deployment="gpt-4o",
            messages=messages + [
                {"role": "user", "content": "Summarize your final answer in a clear, concise statement."}
            ]
        )

        return {
            "problem": problem,
            "reasoning_chain": thoughts,
            "final_answer": final.choices[0].message.content
        }

Pattern 4: Consensus / Ensemble

For high-stakes decisions requiring verification:

class ConsensusGenerator:
    def __init__(self, ai_client):
        self.ai_client = ai_client

    async def generate_with_consensus(self, prompt: str, n_opinions: int = 3) -> dict:
        """Generate multiple responses and synthesize."""

        # Generate diverse opinions
        tasks = []
        for i in range(n_opinions):
            tasks.append(self._generate_opinion(prompt, perspective=i))

        opinions = await asyncio.gather(*tasks)

        # Synthesize consensus
        synthesis = await self._synthesize(prompt, opinions)

        return {
            "prompt": prompt,
            "opinions": opinions,
            "consensus": synthesis["consensus"],
            "confidence": synthesis["confidence"],
            "dissenting_views": synthesis["dissent"]
        }

    async def _generate_opinion(self, prompt: str, perspective: int) -> dict:
        perspectives = [
            "Consider this from a conservative, risk-averse viewpoint.",
            "Consider this from an innovative, forward-thinking viewpoint.",
            "Consider this from a practical, implementation-focused viewpoint."
        ]

        response = await self.ai_client.chat.complete_async(
            deployment="gpt-4o",
            messages=[
                {"role": "system", "content": perspectives[perspective % len(perspectives)]},
                {"role": "user", "content": prompt}
            ],
            temperature=0.8  # Higher temperature for diversity
        )

        return {
            "perspective": perspectives[perspective % len(perspectives)],
            "response": response.choices[0].message.content
        }

    async def _synthesize(self, original_prompt: str, opinions: list[dict]) -> dict:
        opinions_text = "\n\n".join([
            f"Perspective: {o['perspective']}\nResponse: {o['response']}"
            for o in opinions
        ])

        response = await self.ai_client.chat.complete_async(
            deployment="gpt-4o",
            messages=[
                {"role": "system", "content": """Synthesize multiple expert opinions into a consensus view.
                Identify points of agreement and disagreement.
                Provide a confidence level for the consensus."""},
                {"role": "user", "content": f"""Original question: {original_prompt}

                Expert opinions:
                {opinions_text}

                Provide:
                1. Consensus view
                2. Confidence level (0-1)
                3. Key dissenting points"""}
            ]
        )

        # Parse structured response
        return self._parse_synthesis(response.choices[0].message.content)

Pattern 5: Guardrailed Generation

For safe, controlled outputs:

from azure.ai.foundry.safety import ContentFilter, GuardrailChain

class GuardrailedGenerator:
    def __init__(self, ai_client):
        self.ai_client = ai_client
        self.guardrails = self._setup_guardrails()

    def _setup_guardrails(self) -> GuardrailChain:
        return GuardrailChain([
            # Input validation
            ContentFilter(
                name="input_filter",
                block_categories=["hate", "violence", "self_harm"],
                action="block"
            ),
            # Prompt injection detection
            PromptInjectionDetector(
                name="injection_detector",
                sensitivity="high"
            ),
            # Output validation
            ContentFilter(
                name="output_filter",
                block_categories=["pii", "confidential"],
                action="redact"
            ),
            # Factuality check
            FactualityChecker(
                name="fact_checker",
                threshold=0.7
            )
        ])

    async def generate(self, prompt: str) -> dict:
        # Pre-generation checks
        input_check = await self.guardrails.check_input(prompt)
        if not input_check.passed:
            return {
                "blocked": True,
                "reason": input_check.reason
            }

        # Generate
        response = await self.ai_client.chat.complete_async(
            deployment="gpt-4o",
            messages=[{"role": "user", "content": prompt}]
        )

        output = response.choices[0].message.content

        # Post-generation checks
        output_check = await self.guardrails.check_output(output)

        return {
            "blocked": False,
            "output": output_check.content,  # Potentially redacted
            "warnings": output_check.warnings,
            "factuality_score": output_check.factuality_score
        }

For high-quality outputs through self-improvement:

class IterativeRefiner:
    def __init__(self, ai_client):
        self.ai_client = ai_client

    async def generate_and_refine(self, prompt: str, max_iterations: int = 3) -> dict:
        """Generate, critique, and refine iteratively."""

        # Initial generation
        current = await self._generate(prompt)
        iterations = [{"version": 1, "content": current}]

        for i in range(max_iterations - 1):
            # Self-critique
            critique = await self._critique(prompt, current)

            # Check if good enough
            if critique["score"] >= 0.9:
                break

            # Refine based on critique
            current = await self._refine(prompt, current, critique)
            iterations.append({
                "version": i + 2,
                "content": current,
                "improvements": critique["suggestions"]
            })

        return {
            "final": current,
            "iterations": iterations,
            "refinement_count": len(iterations)
        }

    async def _generate(self, prompt: str) -> str:
        response = await self.ai_client.chat.complete_async(
            deployment="gpt-4o",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content

    async def _critique(self, original_prompt: str, content: str) -> dict:
        response = await self.ai_client.chat.complete_async(
            deployment="gpt-4o",
            messages=[
                {"role": "system", "content": """Critically evaluate this response.
                Score it 0-1 and provide specific improvement suggestions."""},
                {"role": "user", "content": f"Original request: {original_prompt}\n\nResponse to evaluate:\n{content}"}
            ]
        )
        return self._parse_critique(response.choices[0].message.content)

    async def _refine(self, original_prompt: str, content: str, critique: dict) -> str:
        response = await self.ai_client.chat.complete_async(
            deployment="gpt-4o",
            messages=[
                {"role": "user", "content": f"""Improve this response based on feedback.

                Original request: {original_prompt}

                Current response: {content}

                Feedback: {critique['suggestions']}

                Provide an improved version."""}
            ]
        )
        return response.choices[0].message.content

These patterns form the building blocks of production AI applications. Combine them based on your specific requirements, and always include proper error handling, logging, and monitoring.

Pattern 1: Retrieval-Augmented Generation (RAG)

Pattern 2: Agent with Tools

Pattern 3: Chain of Thought (CoT)

Pattern 4: Consensus / Ensemble

Pattern 5: Guardrailed Generation

Pattern 6: Iterative Refinement