5 min read
Building AI Applications: Choosing the Right Platform and Architecture
The AI application landscape has matured significantly. Let’s explore how to choose the right platform and architecture for your enterprise AI applications.
The AI Application Stack
Modern AI applications typically consist of:
┌─────────────────────────────────────────┐
│ User Interface │
│ (Web, Mobile, Chat, Voice, API) │
├─────────────────────────────────────────┤
│ Application Layer │
│ (Business Logic, Orchestration) │
├─────────────────────────────────────────┤
│ AI Layer │
│ (Models, Agents, Embeddings, RAG) │
├─────────────────────────────────────────┤
│ Data Layer │
│ (Vector DB, Lakehouse, Cache) │
├─────────────────────────────────────────┤
│ Infrastructure │
│ (Compute, Network, Security) │
└─────────────────────────────────────────┘
Architecture Patterns
Pattern 1: Simple RAG Application
from azure.ai.foundry import AIFoundryClient
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery
class SimpleRAGApp:
"""Basic RAG pattern for document Q&A."""
def __init__(self):
self.ai_client = AIFoundryClient(...)
self.search_client = SearchClient(...)
async def answer_question(self, question: str) -> str:
# 1. Generate embedding for the question
embedding = await self.ai_client.embeddings.create(
model="text-embedding-3-large",
input=question
)
# 2. Search for relevant documents
vector_query = VectorizedQuery(
vector=embedding.data[0].embedding,
k_nearest_neighbors=5,
fields="content_vector"
)
search_results = self.search_client.search(
search_text=question,
vector_queries=[vector_query],
select=["title", "content", "source"]
)
# 3. Build context from search results
context = "\n\n".join([
f"Source: {doc['source']}\n{doc['content']}"
for doc in search_results
])
# 4. Generate answer
response = await self.ai_client.chat.complete(
model="gpt-4o",
messages=[
{
"role": "system",
"content": f"""Answer questions based on the provided context.
If the answer isn't in the context, say so.
Context:
{context}"""
},
{"role": "user", "content": question}
]
)
return response.choices[0].message.content
# Usage
app = SimpleRAGApp()
answer = await app.answer_question("What is our refund policy?")
Pattern 2: Multi-Agent System
from azure.ai.foundry.agents import Agent, Orchestra, Router
class MultiAgentSystem:
"""Complex system with specialized agents."""
def __init__(self):
self.client = AIFoundryClient(...)
# Define specialized agents
self.research_agent = Agent(
name="Researcher",
model="gpt-4o",
instructions="""You research topics thoroughly.
Use web search and document retrieval.""",
tools=[WebSearchTool(), DocumentSearchTool()]
)
self.analyst_agent = Agent(
name="Analyst",
model="gpt-4o",
instructions="""You analyze data and create insights.
Use SQL queries and calculations.""",
tools=[SQLQueryTool(), CalculatorTool()]
)
self.writer_agent = Agent(
name="Writer",
model="gpt-4o",
instructions="""You write clear, professional content.
Create reports, summaries, and documentation.""",
tools=[DocumentGeneratorTool()]
)
# Create router for intelligent task distribution
self.router = Router(
agents=[self.research_agent, self.analyst_agent, self.writer_agent],
strategy="semantic" # Routes based on task understanding
)
# Create orchestra for multi-agent coordination
self.orchestra = Orchestra(
agents=[self.research_agent, self.analyst_agent, self.writer_agent],
coordinator_model="gpt-4o"
)
async def handle_simple_task(self, task: str) -> str:
"""Route to single best agent."""
agent = await self.router.select_agent(task)
response = await agent.run(task)
return response.content
async def handle_complex_task(self, task: str) -> str:
"""Coordinate multiple agents."""
result = await self.orchestra.run(
task=task,
max_iterations=10,
require_approval=False
)
return result.final_output
# Usage
system = MultiAgentSystem()
# Simple task - routed to single agent
answer = await system.handle_simple_task(
"What were our sales numbers last month?"
)
# Complex task - orchestrated across agents
report = await system.handle_complex_task(
"""Create a market analysis report:
1. Research our top 3 competitors
2. Analyze our sales trends vs market
3. Write an executive summary with recommendations"""
)
Pattern 3: Event-Driven AI Pipeline
from azure.functions import FunctionApp
from azure.ai.foundry import AIFoundryClient
from azure.servicebus import ServiceBusClient
import json
app = FunctionApp()
@app.service_bus_queue_trigger(
queue_name="document-processing",
connection="SERVICE_BUS_CONNECTION"
)
async def process_document(msg: str):
"""Event-driven document processing pipeline."""
client = AIFoundryClient(...)
event = json.loads(msg)
document_url = event["document_url"]
document_type = event["document_type"]
# 1. Extract content
content = await extract_document_content(document_url)
# 2. Classify document
classification = await client.chat.complete(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "Classify documents into categories: invoice, contract, report, correspondence"
},
{"role": "user", "content": f"Classify: {content[:2000]}"}
],
response_format={"type": "json_object"}
)
doc_class = json.loads(classification.choices[0].message.content)
# 3. Extract entities based on classification
extraction_prompt = get_extraction_prompt(doc_class["category"])
entities = await client.chat.complete(
model="gpt-4o",
messages=[
{"role": "system", "content": extraction_prompt},
{"role": "user", "content": content}
],
response_format={"type": "json_object"}
)
# 4. Store results
await store_processed_document(
document_url=document_url,
classification=doc_class,
entities=json.loads(entities.choices[0].message.content)
)
# 5. Trigger downstream workflows
if doc_class["category"] == "invoice":
await trigger_invoice_workflow(event["document_id"])
Choosing Your Architecture
| Requirement | Recommended Pattern |
|---|---|
| Simple Q&A | Simple RAG |
| Document search | Simple RAG with filters |
| Task completion | Single Agent |
| Complex workflows | Multi-Agent Orchestra |
| High throughput | Event-Driven Pipeline |
| Real-time chat | Streaming with WebSockets |
| Batch processing | Azure Functions + Queue |
Production Considerations
Caching Strategy
from azure.ai.foundry.caching import SemanticCache
class CachedAIApp:
def __init__(self):
self.cache = SemanticCache(
connection_string=os.getenv("REDIS_CONNECTION"),
embedding_model="text-embedding-3-small",
similarity_threshold=0.95,
ttl_seconds=3600
)
self.client = AIFoundryClient(...)
async def get_answer(self, question: str) -> str:
# Check cache first
cached = await self.cache.get(question)
if cached:
return cached
# Generate new response
response = await self.client.chat.complete(
model="gpt-4o",
messages=[{"role": "user", "content": question}]
)
answer = response.choices[0].message.content
# Cache the response
await self.cache.set(question, answer)
return answer
Error Handling
from azure.ai.foundry.errors import (
RateLimitError,
ContentFilterError,
ModelOverloadedError
)
from tenacity import retry, wait_exponential, stop_after_attempt
class ResilientAIApp:
@retry(
wait=wait_exponential(multiplier=1, min=4, max=60),
stop=stop_after_attempt(3),
retry=lambda e: isinstance(e, (RateLimitError, ModelOverloadedError))
)
async def generate_with_retry(self, prompt: str) -> str:
try:
response = await self.client.chat.complete(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
except ContentFilterError as e:
# Log and return safe response
logger.warning(f"Content filtered: {e}")
return "I'm unable to process this request."
except RateLimitError:
# Will be retried
raise
The key to successful AI applications is choosing the right architecture for your requirements and building with production considerations from the start.