Foundation Models on Azure: Understanding Large Language Models
I wrote “Foundation Models on Azure: Understanding Large Language Models” to share practical, production-minded guidance on this topic.
What Are Foundation Models?
Foundation models are pre-trained on massive datasets and can be:
- Used directly for various tasks
- Fine-tuned for specific domains
- Used as feature extractors
Examples include:
- Language: GPT-3, Codex
- Vision: CLIP, DALL-E 2
- Code: Codex (powers GitHub Copilot)
Foundation Models in Azure
Azure provides access to foundation models through multiple services:
Azure OpenAI Service
import openai
openai.api_type = "azure"
openai.api_base = "https://your-resource.openai.azure.com/"
openai.api_key = "your-api-key"
openai.api_version = "2022-06-01-preview"
# Use GPT-3 Davinci
response = openai.Completion.create(
engine="text-davinci-002",
prompt="Explain the concept of foundation models in AI:",
max_tokens=300,
temperature=0.7
)
print(response.choices[0].text)
Azure Machine Learning Model Catalog
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
# Connect to Azure ML
ml_client = MLClient(
DefaultAzureCredential(),
subscription_id="your-subscription",
resource_group="your-rg",
workspace_name="your-workspace"
)
# Browse available models
models = ml_client.models.list(
registry_name="azureml"
)
for model in models:
print(f"{model.name}: {model.description}")
Using Pre-trained Models
Text Generation with GPT-3
def generate_text(prompt: str, max_tokens: int = 200) -> str:
"""Generate text using GPT-3."""
response = openai.Completion.create(
engine="text-davinci-002",
prompt=prompt,
max_tokens=max_tokens,
temperature=0.7,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return response.choices[0].text.strip()
# Example: Generate code documentation
code = """
def calculate_cosmos_ru(item_size_kb, reads_per_sec, writes_per_sec):
read_rus = reads_per_sec * (item_size_kb / 4) * 1
write_rus = writes_per_sec * (item_size_kb / 4) * 5
return int(read_rus + write_rus)
"""
documentation = generate_text(f"Generate documentation for this function:\n\n{code}")
print(documentation)
Text Embedding Models
import numpy as np
def get_embedding(text: str, model: str = "text-embedding-ada-002") -> list:
"""Get text embedding from foundation model."""
response = openai.Embedding.create(
input=text,
engine=model
)
return response['data'][0]['embedding']
def semantic_similarity(text1: str, text2: str) -> float:
"""Calculate semantic similarity between two texts."""
embedding1 = np.array(get_embedding(text1))
embedding2 = np.array(get_embedding(text2))
# Cosine similarity
similarity = np.dot(embedding1, embedding2) / (
np.linalg.norm(embedding1) * np.linalg.norm(embedding2)
)
return similarity
# Example
similarity = semantic_similarity(
"Azure Cosmos DB is a globally distributed database",
"Microsoft's NoSQL database service with global replication"
)
print(f"Similarity: {similarity:.4f}") # High similarity expected
Fine-tuning Foundation Models
Preparing Data
import json
def prepare_fine_tuning_data(examples: list) -> str:
"""Prepare data for fine-tuning."""
formatted_data = []
for example in examples:
formatted_data.append({
"prompt": example["input"] + "\n\n###\n\n",
"completion": " " + example["output"] + "###"
})
return "\n".join(json.dumps(item) for item in formatted_data)
# Example: Customer service fine-tuning
training_examples = [
{
"input": "Customer: My order hasn't arrived yet.\nContext: Order #12345, shipped 3 days ago",
"output": "I understand your concern about order #12345. Let me check the tracking status. It shows your package is currently in transit and should arrive within 1-2 business days."
},
{
"input": "Customer: How do I return this item?\nContext: Electronics purchase, within 30-day window",
"output": "I'd be happy to help you with your return. Since your electronics purchase is within our 30-day return window, you can initiate a return through your account or I can start the process for you now."
}
]
training_data = prepare_fine_tuning_data(training_examples)
Fine-tuning with Azure OpenAI
# Upload training file
training_file = openai.File.create(
file=open("training_data.jsonl", "rb"),
purpose="fine-tune"
)
# Create fine-tuning job
fine_tune = openai.FineTune.create(
training_file=training_file["id"],
model="davinci",
n_epochs=4,
batch_size=4,
learning_rate_multiplier=0.1
)
# Monitor progress
print(f"Fine-tune job ID: {fine_tune['id']}")
Building Search Applications with Embeddings
Combine embeddings with vector search for powerful applications:
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
class SemanticSearchApplication:
def __init__(self, search_endpoint: str, search_key: str, index_name: str):
self.search_client = SearchClient(
endpoint=search_endpoint,
index_name=index_name,
credential=AzureKeyCredential(search_key)
)
def embed_query(self, query: str) -> list:
"""Get embedding for search query."""
return get_embedding(query)
def search(self, query: str, top_k: int = 5) -> list:
"""Search using semantic similarity."""
# Get query embedding
query_embedding = self.embed_query(query)
# Search using Azure Cognitive Search
results = self.search_client.search(
search_text=query,
top=top_k,
select=["title", "content", "url"]
)
return [
{
"title": r["title"],
"content": r["content"],
"url": r["url"],
"score": r["@search.score"]
}
for r in results
]
# Usage
search_app = SemanticSearchApplication(
search_endpoint="https://your-search.search.windows.net",
search_key="your-key",
index_name="documents"
)
results = search_app.search("How do I configure Azure Cosmos DB for global distribution?")
for result in results:
print(f"{result['title']}: {result['score']:.4f}")
Vision Models
Image Analysis with Azure Computer Vision
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
def analyze_image(image_url: str) -> dict:
"""Analyze image using Computer Vision."""
cv_client = ComputerVisionClient(
endpoint="https://your-cv.cognitiveservices.azure.com",
credentials=CognitiveServicesCredentials("your-key")
)
# Get image description
description = cv_client.describe_image(image_url)
# Get tags
tags = cv_client.tag_image(image_url)
return {
"description": description.captions[0].text if description.captions else None,
"confidence": description.captions[0].confidence if description.captions else 0,
"tags": [tag.name for tag in tags.tags]
}
# Combine with GPT-3 for question answering about images
def answer_image_question(image_url: str, question: str) -> str:
"""Answer questions about an image."""
analysis = analyze_image(image_url)
prompt = f"""Based on this image analysis:
Description: {analysis['description']}
Tags: {', '.join(analysis['tags'])}
Answer this question: {question}
Answer:"""
response = openai.Completion.create(
engine="text-davinci-002",
prompt=prompt,
max_tokens=200,
temperature=0.3
)
return response.choices[0].text.strip()
Cost Considerations
Foundation models can be expensive. Strategies to manage costs:
class CostOptimizedModelClient:
def __init__(self):
self.cache = {}
self.token_usage = 0
def call_with_cache(self, prompt: str, **kwargs) -> str:
"""Cache responses to avoid duplicate API calls."""
cache_key = hash(prompt + str(kwargs))
if cache_key in self.cache:
return self.cache[cache_key]
response = openai.Completion.create(
prompt=prompt,
**kwargs
)
self.token_usage += response['usage']['total_tokens']
result = response.choices[0].text
self.cache[cache_key] = result
return result
def choose_model(self, task_complexity: str) -> str:
"""Choose appropriate model based on task."""
model_map = {
"simple": "text-ada-001", # Cheapest
"medium": "text-curie-001", # Mid-tier
"complex": "text-davinci-002" # Most capable
}
return model_map.get(task_complexity, "text-curie-001")
def get_cost_estimate(self) -> float:
"""Estimate cost based on token usage."""
# Approximate pricing (varies by model)
cost_per_1k_tokens = 0.02
return (self.token_usage / 1000) * cost_per_1k_tokens
Conclusion
Foundation models are transforming AI development. Azure provides access to state-of-the-art models through Azure OpenAI Service, Azure Cognitive Services, and Azure ML, making it easier to build intelligent applications. Whether you use them directly, fine-tune for specific tasks, or build search systems with embeddings, these models enable capabilities that were impossible just a few years ago.