September 13, 2024 1 min read

Structured Outputs: Reliable JSON from OpenAI Models

OpenAI’s Structured Outputs feature ensures your model responses conform to a specific JSON schema. This is a game-changer for building reliable AI applications.

Basic Structured Outputs

from openai import OpenAI
from pydantic import BaseModel
from typing import List, Optional

client = OpenAI()

# Define your schema using Pydantic
class ProductReview(BaseModel):
    product_name: str
    rating: int
    pros: List[str]
    cons: List[str]
    summary: str
    recommended: bool

def extract_review(text: str) -> ProductReview:
    """Extract structured review data from text"""

    response = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {
                "role": "system",
                "content": "Extract product review information from the text."
            },
            {
                "role": "user",
                "content": text
            }
        ],
        response_format=ProductReview
    )

    return response.choices[0].message.parsed

# Usage
review_text = """
Just got the new XPhone Pro and I'm impressed! The camera is
incredible - best photos I've ever taken on a phone. Battery
lasts all day. However, it's quite heavy and the price is steep
at $1200. Face unlock is fast but fails in low light. Overall,
I'd give it 4 out of 5 stars. Definitely recommend if budget
isn't a concern.
"""

review = extract_review(review_text)
print(f"Product: {review.product_name}")
print(f"Rating: {review.rating}/5")
print(f"Recommended: {review.recommended}")

Complex Nested Structures

from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum

class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    CRITICAL = "critical"

class TaskStatus(str, Enum):
    TODO = "todo"
    IN_PROGRESS = "in_progress"
    BLOCKED = "blocked"
    DONE = "done"

class SubTask(BaseModel):
    title: str
    estimated_hours: float
    status: TaskStatus

class Task(BaseModel):
    title: str
    description: str
    priority: Priority
    estimated_hours: float
    dependencies: List[str] = []
    subtasks: List[SubTask] = []
    assignee: Optional[str] = None

class ProjectPlan(BaseModel):
    project_name: str
    description: str
    tasks: List[Task]
    total_estimated_hours: float
    critical_path: List[str]

def create_project_plan(requirements: str) -> ProjectPlan:
    """Generate a structured project plan from requirements"""

    response = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {
                "role": "system",
                "content": """
                You are a project manager. Create a detailed project plan
                from the given requirements. Include realistic time estimates
                and identify the critical path.
                """
            },
            {
                "role": "user",
                "content": requirements
            }
        ],
        response_format=ProjectPlan
    )

    return response.choices[0].message.parsed

# Usage
plan = create_project_plan("""
Build a mobile app for food delivery:
- User registration and authentication
- Restaurant browsing and search
- Order placement and tracking
- Payment integration
- Push notifications
""")

print(f"Project: {plan.project_name}")
print(f"Total hours: {plan.total_estimated_hours}")
print(f"Critical path: {' -> '.join(plan.critical_path)}")

JSON Schema Mode (Without Pydantic)

def extract_with_schema(text: str, schema: dict) -> dict:
    """Extract data using a raw JSON schema"""

    response = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "Extract information according to the schema."},
            {"role": "user", "content": text}
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "extraction",
                "strict": True,
                "schema": schema
            }
        }
    )

    import json
    return json.loads(response.choices[0].message.content)

# Define schema manually
person_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "email": {"type": "string"},
        "skills": {
            "type": "array",
            "items": {"type": "string"}
        }
    },
    "required": ["name", "age", "email", "skills"],
    "additionalProperties": False
}

result = extract_with_schema(
    "John is 32 years old, works as a developer. Contact: john@example.com. Knows Python, TypeScript, and Go.",
    person_schema
)

Handling Refusals

class AnalysisResult(BaseModel):
    findings: List[str]
    risk_level: str
    recommendations: List[str]

def safe_analysis(text: str) -> Optional[AnalysisResult]:
    """Handle cases where the model might refuse"""

    response = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "Analyze the security implications."},
            {"role": "user", "content": text}
        ],
        response_format=AnalysisResult
    )

    message = response.choices[0].message

    # Check for refusal
    if message.refusal:
        print(f"Model refused: {message.refusal}")
        return None

    return message.parsed

Streaming Structured Outputs

from pydantic import BaseModel
from typing import List
import json

class StepByStepSolution(BaseModel):
    problem: str
    steps: List[str]
    final_answer: str
    confidence: float

def stream_structured_response(prompt: str):
    """Stream structured output with partial JSON"""

    response = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "Solve problems step by step."},
            {"role": "user", "content": prompt}
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "solution",
                "strict": True,
                "schema": StepByStepSolution.model_json_schema()
            }
        },
        stream=True
    )

    partial_json = ""
    for chunk in response:
        if chunk.choices[0].delta.content:
            partial_json += chunk.choices[0].delta.content
            print(chunk.choices[0].delta.content, end="", flush=True)

    print()  # Newline
    return json.loads(partial_json)

Best Practices

from pydantic import BaseModel, Field, field_validator
from typing import List, Annotated

class WellDefinedSchema(BaseModel):
    """Use Field for better descriptions and constraints"""

    name: Annotated[str, Field(
        description="The full name of the person",
        min_length=1,
        max_length=100
    )]

    age: Annotated[int, Field(
        description="Age in years",
        ge=0,
        le=150
    )]

    tags: Annotated[List[str], Field(
        description="Relevant tags, max 5",
        max_length=5
    )]

    @field_validator('name')
    @classmethod
    def name_must_not_be_empty(cls, v):
        if not v.strip():
            raise ValueError('Name cannot be empty')
        return v.strip()

# Use descriptive schema names
class CustomerFeedbackAnalysis(BaseModel):
    """Analysis of customer feedback with sentiment and action items"""

    sentiment: Annotated[str, Field(
        description="Overall sentiment: positive, negative, neutral, or mixed"
    )]

    key_points: Annotated[List[str], Field(
        description="Main points raised by the customer"
    )]

    action_items: Annotated[List[str], Field(
        description="Suggested follow-up actions for support team"
    )]

    urgency: Annotated[int, Field(
        description="Urgency level from 1 (low) to 5 (high)",
        ge=1,
        le=5
    )]

Error Handling

from openai import APIError
from pydantic import ValidationError

def robust_structured_extraction(text: str, schema_class):
    """Handle various error scenarios"""

    try:
        response = client.beta.chat.completions.parse(
            model="gpt-4o-2024-08-06",
            messages=[
                {"role": "user", "content": text}
            ],
            response_format=schema_class
        )

        if response.choices[0].message.refusal:
            return {"error": "refusal", "message": response.choices[0].message.refusal}

        return {"success": True, "data": response.choices[0].message.parsed}

    except ValidationError as e:
        return {"error": "validation", "message": str(e)}

    except APIError as e:
        return {"error": "api", "message": str(e)}

Structured Outputs transform unreliable text generation into dependable data extraction. Use them whenever you need guaranteed JSON structure from your AI applications.