January 5, 2023 2 min read

ChatGPT Integration Patterns for Enterprise Applications

ChatGPT has captured the world’s attention, and enterprises are racing to understand how to integrate similar capabilities into their applications. Today, let’s explore practical patterns for building ChatGPT-like experiences using Azure OpenAI Service.

The Chat Paradigm Shift

Traditional GPT-3 interactions are stateless - each request is independent. ChatGPT introduced a conversational paradigm with:

Multi-turn conversations
System prompts for behavior control
Context that builds over the conversation

Simulating Chat with Completion API

While waiting for the official Chat API, we can simulate conversations:

import openai
from typing import List, Dict
from dataclasses import dataclass, field
from datetime import datetime

@dataclass
class Message:
    role: str  # "system", "user", or "assistant"
    content: str
    timestamp: datetime = field(default_factory=datetime.now)

class ConversationManager:
    """Manage multi-turn conversations with GPT-3.5."""

    def __init__(self, deployment: str, system_prompt: str = None):
        self.deployment = deployment
        self.system_prompt = system_prompt or "You are a helpful AI assistant."
        self.messages: List[Message] = []
        self.max_context_tokens = 3000

    def _build_prompt(self) -> str:
        """Build the full prompt from conversation history."""
        prompt_parts = [f"System: {self.system_prompt}\n"]

        for msg in self.messages:
            role_label = "Human" if msg.role == "user" else "Assistant"
            prompt_parts.append(f"{role_label}: {msg.content}\n")

        prompt_parts.append("Assistant:")
        return "\n".join(prompt_parts)

    def _truncate_history(self):
        """Truncate old messages to fit within token limit."""
        import tiktoken
        encoding = tiktoken.encoding_for_model("text-davinci-003")

        while True:
            prompt = self._build_prompt()
            tokens = len(encoding.encode(prompt))

            if tokens <= self.max_context_tokens or len(self.messages) <= 2:
                break

            # Remove oldest non-system message
            self.messages.pop(0)

    def send_message(self, user_message: str) -> str:
        """Send a message and get a response."""
        # Add user message
        self.messages.append(Message(role="user", content=user_message))

        # Truncate if needed
        self._truncate_history()

        # Build prompt and get response
        prompt = self._build_prompt()

        response = openai.Completion.create(
            engine=self.deployment,
            prompt=prompt,
            max_tokens=500,
            temperature=0.7,
            stop=["Human:", "System:"]
        )

        assistant_message = response.choices[0].text.strip()
        self.messages.append(Message(role="assistant", content=assistant_message))

        return assistant_message

    def reset(self):
        """Clear conversation history."""
        self.messages = []

# Usage
conversation = ConversationManager(
    deployment="gpt35",
    system_prompt="You are a helpful Azure solutions architect. Provide detailed technical guidance."
)

# Multi-turn conversation
response1 = conversation.send_message("What's the best way to store time-series data in Azure?")
print(f"Assistant: {response1}\n")

response2 = conversation.send_message("How would I query that for the last 7 days?")
print(f"Assistant: {response2}\n")

response3 = conversation.send_message("What about cost optimization?")
print(f"Assistant: {response3}")

Session Management Pattern

For web applications, manage sessions properly:

from flask import Flask, session, request, jsonify
import redis
import json
import uuid

app = Flask(__name__)
app.secret_key = "your-secret-key"

# Redis for session storage
redis_client = redis.Redis(host='localhost', port=6379, db=0)

class ChatSession:
    """Persistent chat session with Redis backing."""

    SESSION_TTL = 3600  # 1 hour

    @staticmethod
    def get_or_create_session(session_id: str = None) -> str:
        """Get existing session or create new one."""
        if session_id and redis_client.exists(f"chat:{session_id}"):
            return session_id
        return str(uuid.uuid4())

    @staticmethod
    def save_message(session_id: str, role: str, content: str):
        """Save a message to the session."""
        key = f"chat:{session_id}"
        message = json.dumps({"role": role, "content": content})
        redis_client.rpush(key, message)
        redis_client.expire(key, ChatSession.SESSION_TTL)

    @staticmethod
    def get_messages(session_id: str) -> List[Dict]:
        """Get all messages in a session."""
        key = f"chat:{session_id}"
        messages = redis_client.lrange(key, 0, -1)
        return [json.loads(m) for m in messages]

    @staticmethod
    def delete_session(session_id: str):
        """Delete a session."""
        redis_client.delete(f"chat:{session_id}")

@app.route('/chat', methods=['POST'])
def chat():
    data = request.json
    session_id = data.get('session_id') or ChatSession.get_or_create_session()
    user_message = data.get('message')

    # Save user message
    ChatSession.save_message(session_id, "user", user_message)

    # Get conversation history
    messages = ChatSession.get_messages(session_id)

    # Build prompt and get response
    conversation = ConversationManager(deployment="gpt35")
    for msg in messages[:-1]:  # Exclude last message (we'll send it)
        conversation.messages.append(Message(role=msg['role'], content=msg['content']))

    response = conversation.send_message(user_message)

    # Save assistant response
    ChatSession.save_message(session_id, "assistant", response)

    return jsonify({
        "session_id": session_id,
        "response": response
    })

@app.route('/chat/history/<session_id>', methods=['GET'])
def get_history(session_id):
    messages = ChatSession.get_messages(session_id)
    return jsonify({"messages": messages})

Streaming Responses

For better UX, stream responses as they’re generated:

import openai
from typing import Generator

def stream_completion(prompt: str, deployment: str) -> Generator[str, None, None]:
    """Stream completion tokens as they're generated."""
    response = openai.Completion.create(
        engine=deployment,
        prompt=prompt,
        max_tokens=500,
        temperature=0.7,
        stream=True
    )

    for chunk in response:
        if chunk.choices[0].text:
            yield chunk.choices[0].text

# Flask SSE endpoint
from flask import Response

@app.route('/chat/stream', methods=['POST'])
def chat_stream():
    data = request.json
    prompt = data.get('prompt')

    def generate():
        for token in stream_completion(prompt, "gpt35"):
            yield f"data: {json.dumps({'token': token})}\n\n"
        yield "data: [DONE]\n\n"

    return Response(generate(), mimetype='text/event-stream')

Frontend Integration

Here’s a React component for the chat interface:

import React, { useState, useEffect, useRef } from 'react';

interface Message {
  role: 'user' | 'assistant';
  content: string;
}

const ChatComponent: React.FC = () => {
  const [messages, setMessages] = useState<Message[]>([]);
  const [input, setInput] = useState('');
  const [sessionId, setSessionId] = useState<string | null>(null);
  const [isLoading, setIsLoading] = useState(false);
  const messagesEndRef = useRef<HTMLDivElement>(null);

  const scrollToBottom = () => {
    messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
  };

  useEffect(scrollToBottom, [messages]);

  const sendMessage = async () => {
    if (!input.trim()) return;

    const userMessage: Message = { role: 'user', content: input };
    setMessages(prev => [...prev, userMessage]);
    setInput('');
    setIsLoading(true);

    try {
      const response = await fetch('/chat', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({
          session_id: sessionId,
          message: input
        })
      });

      const data = await response.json();
      setSessionId(data.session_id);

      const assistantMessage: Message = {
        role: 'assistant',
        content: data.response
      };
      setMessages(prev => [...prev, assistantMessage]);
    } catch (error) {
      console.error('Error sending message:', error);
    } finally {
      setIsLoading(false);
    }
  };

  return (
    <div className="chat-container">
      <div className="messages">
        {messages.map((msg, idx) => (
          <div key={idx} className={`message ${msg.role}`}>
            <strong>{msg.role === 'user' ? 'You' : 'Assistant'}:</strong>
            <p>{msg.content}</p>
          </div>
        ))}
        {isLoading && <div className="loading">Thinking...</div>}
        <div ref={messagesEndRef} />
      </div>
      <div className="input-area">
        <input
          type="text"
          value={input}
          onChange={(e) => setInput(e.target.value)}
          onKeyPress={(e) => e.key === 'Enter' && sendMessage()}
          placeholder="Type your message..."
        />
        <button onClick={sendMessage} disabled={isLoading}>
          Send
        </button>
      </div>
    </div>
  );
};

export default ChatComponent;

System Prompts for Different Use Cases

System prompts shape the assistant’s behavior:

SYSTEM_PROMPTS = {
    "customer_support": """You are a customer support agent for Contoso Ltd.
- Be helpful, empathetic, and professional
- If you don't know something, say so and offer to escalate
- Never make promises about refunds or compensation without checking
- Keep responses concise but complete""",

    "code_reviewer": """You are a senior software engineer reviewing code.
- Focus on bugs, security issues, and performance problems
- Suggest improvements with code examples
- Be constructive, not critical
- Explain the 'why' behind your suggestions""",

    "data_analyst": """You are a data analyst assistant.
- Help users understand their data and write queries
- Suggest appropriate visualizations
- Explain statistical concepts in simple terms
- Always consider data privacy and security"""
}

def create_specialized_assistant(specialty: str) -> ConversationManager:
    """Create a conversation manager with specialized system prompt."""
    system_prompt = SYSTEM_PROMPTS.get(specialty, SYSTEM_PROMPTS["customer_support"])
    return ConversationManager(deployment="gpt35", system_prompt=system_prompt)

Best Practices

Manage Context Window: Keep conversation history within token limits
Persist Sessions: Use Redis or similar for session persistence
Stream Responses: Improve perceived performance with streaming
Rate Limit: Implement per-user rate limiting
Log Everything: Track conversations for improvement and compliance

What’s Coming

Microsoft has announced that the official Chat Completion API (like ChatGPT uses) is coming to Azure OpenAI Service. This will provide:

Native multi-turn support
Better context management
Improved instruction following

Until then, these patterns will help you build production-ready chat experiences.