6 min read
Completion vs Chat APIs in Azure OpenAI: Choosing the Right Approach
Azure OpenAI offers two main API paradigms: the Completion API and the Chat Completion API. Understanding when to use each is crucial for building effective AI applications. Let’s explore the differences, use cases, and migration strategies.
API Comparison
Completion API
The original API for text generation:
import openai
# Completion API
response = openai.Completion.create(
engine="text-davinci-003",
prompt="Translate the following to French: Hello, how are you?",
max_tokens=100,
temperature=0.7
)
print(response.choices[0].text)
Chat Completion API
Message-based API with roles:
import openai
# Chat Completion API
response = openai.ChatCompletion.create(
engine="gpt-35-turbo",
messages=[
{"role": "system", "content": "You are a translator."},
{"role": "user", "content": "Translate to French: Hello, how are you?"}
],
max_tokens=100,
temperature=0.7
)
print(response.choices[0].message.content)
Key Differences
from dataclasses import dataclass
from typing import List, Optional
from enum import Enum
class APIType(Enum):
COMPLETION = "completion"
CHAT = "chat"
@dataclass
class APIComparison:
"""Comparison of Completion vs Chat APIs."""
feature: str
completion_api: str
chat_api: str
COMPARISONS = [
APIComparison(
feature="Input Format",
completion_api="Single text prompt",
chat_api="Array of messages with roles"
),
APIComparison(
feature="System Prompt",
completion_api="Must be included in prompt text",
chat_api="Dedicated system role"
),
APIComparison(
feature="Conversation History",
completion_api="Manual management in prompt",
chat_api="Built-in message array"
),
APIComparison(
feature="Models",
completion_api="text-davinci-003, etc.",
chat_api="gpt-35-turbo, gpt-4"
),
APIComparison(
feature="Cost Efficiency",
completion_api="Higher (davinci pricing)",
chat_api="Lower (turbo pricing)"
),
APIComparison(
feature="Best For",
completion_api="Text completion, single-turn",
chat_api="Conversations, instructions"
)
]
def print_comparison_table():
"""Print comparison as table."""
print(f"{'Feature':<25} {'Completion API':<30} {'Chat API':<30}")
print("-" * 85)
for comp in COMPARISONS:
print(f"{comp.feature:<25} {comp.completion_api:<30} {comp.chat_api:<30}")
Building a Unified Client
Create an abstraction that works with both APIs:
from abc import ABC, abstractmethod
from typing import List, Dict, Union, Optional
import openai
@dataclass
class Message:
"""A chat message."""
role: str # system, user, assistant
content: str
@dataclass
class CompletionResponse:
"""Unified response format."""
content: str
tokens_used: int
model: str
finish_reason: str
class LLMClient(ABC):
"""Abstract LLM client."""
@abstractmethod
def generate(
self,
prompt: Union[str, List[Message]],
max_tokens: int = 500,
**kwargs
) -> CompletionResponse:
pass
class CompletionClient(LLMClient):
"""Client for Completion API."""
def __init__(self, deployment: str):
self.deployment = deployment
def generate(
self,
prompt: Union[str, List[Message]],
max_tokens: int = 500,
**kwargs
) -> CompletionResponse:
# Convert messages to prompt if needed
if isinstance(prompt, list):
prompt = self._messages_to_prompt(prompt)
response = openai.Completion.create(
engine=self.deployment,
prompt=prompt,
max_tokens=max_tokens,
**kwargs
)
return CompletionResponse(
content=response.choices[0].text.strip(),
tokens_used=response.usage.total_tokens,
model=response.model,
finish_reason=response.choices[0].finish_reason
)
def _messages_to_prompt(self, messages: List[Message]) -> str:
"""Convert chat messages to completion prompt."""
parts = []
for msg in messages:
if msg.role == "system":
parts.append(f"System: {msg.content}")
elif msg.role == "user":
parts.append(f"Human: {msg.content}")
elif msg.role == "assistant":
parts.append(f"Assistant: {msg.content}")
parts.append("Assistant:")
return "\n\n".join(parts)
class ChatClient(LLMClient):
"""Client for Chat Completion API."""
def __init__(self, deployment: str):
self.deployment = deployment
def generate(
self,
prompt: Union[str, List[Message]],
max_tokens: int = 500,
**kwargs
) -> CompletionResponse:
# Convert string to messages if needed
if isinstance(prompt, str):
messages = [Message(role="user", content=prompt)]
else:
messages = prompt
response = openai.ChatCompletion.create(
engine=self.deployment,
messages=[{"role": m.role, "content": m.content} for m in messages],
max_tokens=max_tokens,
**kwargs
)
return CompletionResponse(
content=response.choices[0].message.content,
tokens_used=response.usage.total_tokens,
model=response.model,
finish_reason=response.choices[0].finish_reason
)
class UnifiedClient:
"""Unified client that can use either API."""
def __init__(
self,
completion_deployment: Optional[str] = None,
chat_deployment: Optional[str] = None,
default_api: APIType = APIType.CHAT
):
self.completion_client = CompletionClient(completion_deployment) if completion_deployment else None
self.chat_client = ChatClient(chat_deployment) if chat_deployment else None
self.default_api = default_api
def generate(
self,
prompt: Union[str, List[Message]],
api_type: Optional[APIType] = None,
**kwargs
) -> CompletionResponse:
"""Generate using specified or default API."""
api = api_type or self.default_api
if api == APIType.COMPLETION:
if not self.completion_client:
raise ValueError("Completion deployment not configured")
return self.completion_client.generate(prompt, **kwargs)
else:
if not self.chat_client:
raise ValueError("Chat deployment not configured")
return self.chat_client.generate(prompt, **kwargs)
# Usage
client = UnifiedClient(
completion_deployment="text-davinci-003",
chat_deployment="gpt-35-turbo",
default_api=APIType.CHAT
)
# Use chat API (default)
result = client.generate("What is Azure?")
# Explicitly use completion API
result = client.generate(
"Complete this sentence: Azure is",
api_type=APIType.COMPLETION
)
When to Use Each API
class APISelector:
"""Help select the appropriate API."""
USE_CASES = {
APIType.COMPLETION: [
"Text completion (finish a sentence/paragraph)",
"Single-turn text generation",
"Legacy applications using davinci",
"Fill-in-the-blank tasks",
"When you need fine-grained prompt control"
],
APIType.CHAT: [
"Conversational applications",
"Multi-turn dialogues",
"Instruction-following tasks",
"When you need system prompts",
"Cost-sensitive applications (cheaper)",
"GPT-4 access (chat only)"
]
}
RECOMMENDATIONS = {
"chatbot": APIType.CHAT,
"code_generation": APIType.CHAT,
"text_completion": APIType.COMPLETION,
"summarization": APIType.CHAT,
"translation": APIType.CHAT,
"qa": APIType.CHAT,
"creative_writing": APIType.CHAT,
"autocomplete": APIType.COMPLETION,
"classification": APIType.CHAT,
"extraction": APIType.CHAT
}
@classmethod
def recommend(cls, task: str) -> APIType:
"""Recommend API for a task."""
task_lower = task.lower()
# Check direct matches
if task_lower in cls.RECOMMENDATIONS:
return cls.RECOMMENDATIONS[task_lower]
# Check keywords
if any(kw in task_lower for kw in ["chat", "conversation", "dialogue"]):
return APIType.CHAT
if any(kw in task_lower for kw in ["complete", "finish", "continue"]):
return APIType.COMPLETION
# Default to chat (more versatile and cheaper)
return APIType.CHAT
@classmethod
def explain_choice(cls, api_type: APIType) -> List[str]:
"""Explain why to use this API."""
return cls.USE_CASES.get(api_type, [])
# Usage
api = APISelector.recommend("build a customer support chatbot")
print(f"Recommended: {api.value}")
print("Reasons:")
for reason in APISelector.explain_choice(api):
print(f" - {reason}")
Migrating from Completion to Chat
class MigrationHelper:
"""Help migrate from Completion to Chat API."""
@staticmethod
def convert_prompt_to_messages(
prompt: str,
system_message: Optional[str] = None
) -> List[Dict[str, str]]:
"""Convert a completion prompt to chat messages."""
messages = []
# Add system message if provided
if system_message:
messages.append({
"role": "system",
"content": system_message
})
# Try to detect conversation structure
lines = prompt.strip().split('\n')
current_role = "user"
current_content = []
for line in lines:
line = line.strip()
if not line:
continue
# Detect role markers
if line.startswith(("Human:", "User:", "Q:")):
if current_content:
messages.append({
"role": current_role,
"content": " ".join(current_content)
})
current_role = "user"
current_content = [line.split(":", 1)[-1].strip()]
elif line.startswith(("Assistant:", "AI:", "A:")):
if current_content:
messages.append({
"role": current_role,
"content": " ".join(current_content)
})
current_role = "assistant"
current_content = [line.split(":", 1)[-1].strip()]
elif line.startswith("System:"):
# Move system content to beginning
system_content = line.split(":", 1)[-1].strip()
if messages and messages[0]["role"] == "system":
messages[0]["content"] += " " + system_content
else:
messages.insert(0, {"role": "system", "content": system_content})
else:
current_content.append(line)
# Add remaining content
if current_content:
messages.append({
"role": current_role,
"content": " ".join(current_content)
})
return messages
@staticmethod
def migrate_parameters(completion_params: dict) -> dict:
"""Migrate Completion API parameters to Chat API."""
chat_params = {}
# Direct mappings
direct_mappings = [
"max_tokens", "temperature", "top_p",
"frequency_penalty", "presence_penalty", "stop"
]
for param in direct_mappings:
if param in completion_params:
chat_params[param] = completion_params[param]
# Renamed parameters
if "engine" in completion_params:
# Might need to map model name
engine = completion_params["engine"]
if "davinci" in engine.lower():
chat_params["engine"] = "gpt-35-turbo" # or appropriate chat model
else:
chat_params["engine"] = engine
# Handle prompt
if "prompt" in completion_params:
chat_params["messages"] = MigrationHelper.convert_prompt_to_messages(
completion_params["prompt"]
)
# Note: 'n', 'best_of', 'logprobs' work differently or aren't available
return chat_params
# Example migration
old_code = {
"engine": "text-davinci-003",
"prompt": """System: You are a helpful assistant.