Back to Blog
5 min read

Advanced Reasoning Techniques on Azure OpenAI

Azure OpenAI continues to evolve, bringing advanced AI capabilities to enterprise environments with Azure’s security, compliance, and scale benefits. Today we explore techniques for improving AI reasoning in your applications.

Getting Started with Azure OpenAI

from openai import AzureOpenAI
import os

# Configure Azure OpenAI client
client = AzureOpenAI(
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version="2024-06-01",
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"]
)

# GPT-4o deployment
GPT4O_DEPLOYMENT = "gpt-4o"

def call_gpt4o(prompt: str, max_tokens: int = 4096) -> dict:
    """
    Call GPT-4o on Azure OpenAI
    """
    response = client.chat.completions.create(
        model=GPT4O_DEPLOYMENT,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens
    )

    return {
        "content": response.choices[0].message.content,
        "total_tokens": response.usage.total_tokens
    }

Improving Reasoning with Prompting

Chain-of-Thought Technique

def solve_with_reasoning(problem: str) -> str:
    """
    Use chain-of-thought prompting for better reasoning
    """
    prompt = f"""
    Solve the following problem step by step.

    Problem: {problem}

    Instructions:
    1. First, understand what is being asked
    2. Identify the key information and constraints
    3. Plan your approach
    4. Work through the solution carefully
    5. Verify your answer

    Show your complete reasoning process.
    """

    response = client.chat.completions.create(
        model=GPT4O_DEPLOYMENT,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=4096
    )

    return response.choices[0].message.content

Self-Verification Pattern

def solve_and_verify(problem: str) -> dict:
    """
    Solve a problem and then verify the solution
    """
    # First, solve the problem
    solution_response = client.chat.completions.create(
        model=GPT4O_DEPLOYMENT,
        messages=[{
            "role": "user",
            "content": f"Solve step by step: {problem}"
        }],
        max_tokens=4096
    )
    solution = solution_response.choices[0].message.content

    # Then, verify the solution
    verification_response = client.chat.completions.create(
        model=GPT4O_DEPLOYMENT,
        messages=[{
            "role": "user",
            "content": f"""
            Problem: {problem}

            Proposed solution:
            {solution}

            Please verify this solution:
            1. Check each step for errors
            2. Verify the final answer
            3. Note any issues found
            """
        }],
        max_tokens=2048
    )

    return {
        "solution": solution,
        "verification": verification_response.choices[0].message.content,
        "total_tokens": (solution_response.usage.total_tokens +
                        verification_response.usage.total_tokens)
    }

Enterprise Patterns

Managed Identity Authentication

from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from openai import AzureOpenAI

# Use managed identity for production
token_provider = get_bearer_token_provider(
    DefaultAzureCredential(),
    "https://cognitiveservices.azure.com/.default"
)

client = AzureOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_ad_token_provider=token_provider,
    api_version="2024-06-01"
)

def secure_reasoning_call(prompt: str) -> str:
    """Call with managed identity authentication"""
    response = client.chat.completions.create(
        model=GPT4O_DEPLOYMENT,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=4096
    )
    return response.choices[0].message.content

Content Filtering Integration

def call_with_content_safety(prompt: str) -> dict:
    """
    Azure automatically applies content filtering
    Handle filtered responses appropriately
    """
    try:
        response = client.chat.completions.create(
            model=GPT4O_DEPLOYMENT,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=4096
        )

        # Check for content filter results
        finish_reason = response.choices[0].finish_reason

        if finish_reason == "content_filter":
            return {
                "success": False,
                "error": "Response filtered by content safety",
                "content": None
            }

        return {
            "success": True,
            "content": response.choices[0].message.content,
            "usage": response.usage.model_dump()
        }

    except Exception as e:
        if "content_filter" in str(e).lower():
            return {
                "success": False,
                "error": "Prompt filtered by content safety",
                "content": None
            }
        raise

Cost Management

from typing import Optional
import time

class AzureOpenAICostTracker:
    """Track costs for Azure OpenAI usage"""

    # Current GPT-4o pricing
    PRICE_PER_1M_INPUT = 2.50
    PRICE_PER_1M_OUTPUT = 10.00

    def __init__(self, monthly_budget: float):
        self.monthly_budget = monthly_budget
        self.monthly_spend = 0.0
        self.request_count = 0

    def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
        input_cost = (input_tokens / 1_000_000) * self.PRICE_PER_1M_INPUT
        output_cost = (output_tokens / 1_000_000) * self.PRICE_PER_1M_OUTPUT
        return input_cost + output_cost

    def record_request(self, usage: dict) -> dict:
        cost = self.calculate_cost(
            usage["prompt_tokens"],
            usage["completion_tokens"]
        )

        self.monthly_spend += cost
        self.request_count += 1

        return {
            "request_cost": cost,
            "monthly_spend": self.monthly_spend,
            "budget_remaining": self.monthly_budget - self.monthly_spend,
            "budget_percentage_used": (self.monthly_spend / self.monthly_budget) * 100
        }

    def can_proceed(self, estimated_cost: float) -> bool:
        return (self.monthly_spend + estimated_cost) <= self.monthly_budget

# Usage
tracker = AzureOpenAICostTracker(monthly_budget=1000.0)

response = client.chat.completions.create(
    model=GPT4O_DEPLOYMENT,
    messages=[{"role": "user", "content": "Complex reasoning task..."}],
    max_tokens=8192
)

cost_info = tracker.record_request(response.usage.model_dump())
print(f"Request cost: ${cost_info['request_cost']:.4f}")
print(f"Budget remaining: ${cost_info['budget_remaining']:.2f}")

Logging and Monitoring

import logging
from azure.monitor.opentelemetry import configure_azure_monitor
from opentelemetry import trace

# Configure Azure Monitor
configure_azure_monitor(
    connection_string=os.environ["APPLICATIONINSIGHTS_CONNECTION_STRING"]
)

tracer = trace.get_tracer(__name__)
logger = logging.getLogger(__name__)

def traced_reasoning_call(prompt: str, operation_name: str) -> str:
    """Call with Azure Monitor tracing"""

    with tracer.start_as_current_span(operation_name) as span:
        span.set_attribute("prompt_length", len(prompt))
        span.set_attribute("model", GPT4O_DEPLOYMENT)

        start_time = time.time()

        response = client.chat.completions.create(
            model=GPT4O_DEPLOYMENT,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=8192
        )

        duration = time.time() - start_time

        # Log metrics
        span.set_attribute("duration_seconds", duration)
        span.set_attribute("total_tokens", response.usage.total_tokens)

        logger.info(f"GPT-4o call completed",
                   extra={
                       "operation": operation_name,
                       "duration": duration,
                       "tokens": response.usage.total_tokens
                   })

        return response.choices[0].message.content

Compliance Considerations

Azure OpenAI provides:

  • Data residency in selected regions
  • SOC 2, HIPAA, ISO compliance
  • Customer-managed keys option
  • Private endpoints support
  • Azure RBAC integration
# Private endpoint configuration example
# (Configure in Azure Portal or via ARM template)

# In code, just use the private endpoint URL
client = AzureOpenAI(
    azure_endpoint="https://my-openai.privatelink.openai.azure.com/",
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version="2024-06-01"
)

Best Practices

  1. Use managed identity in production environments
  2. Set up cost alerts in Azure Cost Management
  3. Enable diagnostic logging to Log Analytics
  4. Use private endpoints for sensitive workloads
  5. Implement retry logic with exponential backoff
  6. Monitor token usage to control costs

Looking Ahead

The AI landscape is evolving rapidly. OpenAI has hinted at improved reasoning capabilities in future models. Build your infrastructure to be flexible and ready to adopt new features as they become available on Azure.

Resources

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.