5 min read
GPT-3.5 on Azure: Understanding the Model and Its Capabilities
With Azure OpenAI Service approaching GA, let’s dive deep into GPT-3.5 - the model powering ChatGPT - and understand how to use it effectively in your applications.
Understanding GPT-3.5 Models
GPT-3.5 is not a single model but a family of models with different capabilities:
| Model | Best For | Max Tokens | Cost |
|---|---|---|---|
| text-davinci-003 | Complex tasks, longer output | 4,097 | $0.02/1K |
| text-curie-001 | Balanced performance | 2,049 | $0.002/1K |
| text-babbage-001 | Straightforward tasks | 2,049 | $0.0005/1K |
| text-ada-001 | Simple classification | 2,049 | $0.0004/1K |
Deploying GPT-3.5 on Azure
First, deploy your model using Azure CLI:
# Create the Azure OpenAI resource
az cognitiveservices account create \
--name my-openai-resource \
--resource-group my-rg \
--kind OpenAI \
--sku S0 \
--location eastus
# Deploy GPT-3.5 model
az cognitiveservices account deployment create \
--name my-openai-resource \
--resource-group my-rg \
--deployment-name gpt35-deployment \
--model-name text-davinci-003 \
--model-version "1" \
--model-format OpenAI \
--scale-settings-scale-type "Standard"
Working with GPT-3.5 in Python
Here’s a comprehensive example showing various GPT-3.5 capabilities:
import openai
import os
from typing import Optional, List, Dict
class GPT35Client:
"""Client for working with GPT-3.5 on Azure."""
def __init__(self):
openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_version = "2022-12-01"
openai.api_key = os.getenv("AZURE_OPENAI_KEY")
self.deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt35")
def complete(
self,
prompt: str,
max_tokens: int = 500,
temperature: float = 0.7,
stop: Optional[List[str]] = None
) -> str:
"""Generate a completion."""
response = openai.Completion.create(
engine=self.deployment,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
stop=stop
)
return response.choices[0].text.strip()
def summarize(self, text: str, max_length: int = 100) -> str:
"""Summarize text."""
prompt = f"""Summarize the following text in approximately {max_length} words:
Text: {text}
Summary:"""
return self.complete(prompt, max_tokens=max_length * 2)
def extract_entities(self, text: str) -> Dict:
"""Extract named entities from text."""
prompt = f"""Extract named entities from the following text.
Return as JSON with categories: persons, organizations, locations, dates.
Text: {text}
JSON:"""
result = self.complete(prompt, temperature=0.1)
import json
return json.loads(result)
def translate(self, text: str, target_language: str) -> str:
"""Translate text to target language."""
prompt = f"""Translate the following text to {target_language}:
Text: {text}
Translation:"""
return self.complete(prompt, temperature=0.3)
def generate_code(self, description: str, language: str = "python") -> str:
"""Generate code from description."""
prompt = f"""Write {language} code that does the following:
{description}
Only output the code, no explanations.
```{language}"""
result = self.complete(prompt, temperature=0.2, stop=["```"])
return result
# Usage examples
client = GPT35Client()
# Summarization
summary = client.summarize("""
Azure OpenAI Service provides REST API access to OpenAI's powerful language
models including GPT-3.5, Codex and DALL-E. These models can be easily adapted
to your specific task including content generation, summarization, semantic
search, and natural language to code translation.
""")
print(f"Summary: {summary}")
# Entity extraction
entities = client.extract_entities(
"Microsoft CEO Satya Nadella announced in Seattle on January 2023 "
"that Azure OpenAI Service is now generally available."
)
print(f"Entities: {entities}")
# Code generation
code = client.generate_code(
"Read a CSV file and calculate the average of a numeric column",
language="python"
)
print(f"Generated code:\n{code}")
Temperature and Creativity Control
Temperature controls randomness in outputs:
def demonstrate_temperature(prompt: str):
"""Show how temperature affects output."""
temperatures = [0.0, 0.5, 1.0]
for temp in temperatures:
print(f"\n--- Temperature: {temp} ---")
response = openai.Completion.create(
engine="gpt35",
prompt=prompt,
max_tokens=100,
temperature=temp,
n=3 # Generate 3 completions
)
for i, choice in enumerate(response.choices):
print(f"Completion {i+1}: {choice.text.strip()[:100]}...")
# Lower temperature = more deterministic
# Higher temperature = more creative/random
demonstrate_temperature("Write a tagline for a cloud computing company:")
Token Management
Understanding and managing tokens is crucial:
import tiktoken
def count_tokens(text: str, model: str = "text-davinci-003") -> int:
"""Count tokens in text."""
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
def optimize_prompt(prompt: str, max_prompt_tokens: int = 3000) -> str:
"""Truncate prompt to fit within token limit."""
encoding = tiktoken.encoding_for_model("text-davinci-003")
tokens = encoding.encode(prompt)
if len(tokens) <= max_prompt_tokens:
return prompt
# Truncate and decode
truncated_tokens = tokens[:max_prompt_tokens]
return encoding.decode(truncated_tokens) + "..."
def estimate_cost(prompt: str, max_tokens: int, model: str = "text-davinci-003") -> float:
"""Estimate cost of a completion request."""
prompt_tokens = count_tokens(prompt)
total_tokens = prompt_tokens + max_tokens
# Pricing per 1K tokens
pricing = {
"text-davinci-003": 0.02,
"text-curie-001": 0.002,
"text-babbage-001": 0.0005,
"text-ada-001": 0.0004
}
rate = pricing.get(model, 0.02)
return (total_tokens / 1000) * rate
# Example
prompt = "Explain quantum computing in simple terms:"
print(f"Token count: {count_tokens(prompt)}")
print(f"Estimated cost: ${estimate_cost(prompt, 500):.4f}")
Practical Use Cases
Document Classification
def classify_document(document: str, categories: List[str]) -> str:
"""Classify a document into one of the given categories."""
categories_str = ", ".join(categories)
prompt = f"""Classify the following document into one of these categories: {categories_str}
Document: {document}
Category:"""
response = openai.Completion.create(
engine="gpt35",
prompt=prompt,
max_tokens=20,
temperature=0.1
)
return response.choices[0].text.strip()
# Usage
result = classify_document(
"The quarterly revenue exceeded expectations with a 15% YoY growth...",
["Finance", "Technology", "Healthcare", "Marketing"]
)
print(f"Classification: {result}") # Output: Finance
SQL Query Generation
def generate_sql(question: str, schema: str) -> str:
"""Generate SQL query from natural language question."""
prompt = f"""Given the following SQL schema:
{schema}
Write a SQL query to answer: {question}
SQL Query:"""
response = openai.Completion.create(
engine="gpt35",
prompt=prompt,
max_tokens=200,
temperature=0.1,
stop=[";"]
)
return response.choices[0].text.strip() + ";"
# Usage
schema = """
CREATE TABLE orders (
order_id INT PRIMARY KEY,
customer_id INT,
order_date DATE,
total_amount DECIMAL(10,2)
);
CREATE TABLE customers (
customer_id INT PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100)
);
"""
query = generate_sql(
"Find the top 5 customers by total order amount",
schema
)
print(query)
Best Practices
- Use the right model size: Start with smaller models and only upgrade if needed
- Set appropriate temperature: Lower for factual tasks, higher for creative
- Implement token counting: Prevent exceeding context limits
- Cache responses: For repeated queries, cache results
- Handle rate limits: Implement exponential backoff
What’s Next
GPT-3.5 is powerful, but the real game-changer is coming: ChatGPT-style conversations with the Chat Completion API. Stay tuned for my next post on ChatGPT integration patterns.