Back to Blog
5 min read

Structured Outputs: Reliable JSON from OpenAI Models

OpenAI’s Structured Outputs feature ensures your model responses conform to a specific JSON schema. This is a game-changer for building reliable AI applications.

Basic Structured Outputs

from openai import OpenAI
from pydantic import BaseModel
from typing import List, Optional

client = OpenAI()

# Define your schema using Pydantic
class ProductReview(BaseModel):
    product_name: str
    rating: int
    pros: List[str]
    cons: List[str]
    summary: str
    recommended: bool

def extract_review(text: str) -> ProductReview:
    """Extract structured review data from text"""

    response = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {
                "role": "system",
                "content": "Extract product review information from the text."
            },
            {
                "role": "user",
                "content": text
            }
        ],
        response_format=ProductReview
    )

    return response.choices[0].message.parsed

# Usage
review_text = """
Just got the new XPhone Pro and I'm impressed! The camera is
incredible - best photos I've ever taken on a phone. Battery
lasts all day. However, it's quite heavy and the price is steep
at $1200. Face unlock is fast but fails in low light. Overall,
I'd give it 4 out of 5 stars. Definitely recommend if budget
isn't a concern.
"""

review = extract_review(review_text)
print(f"Product: {review.product_name}")
print(f"Rating: {review.rating}/5")
print(f"Recommended: {review.recommended}")

Complex Nested Structures

from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum

class Priority(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    CRITICAL = "critical"

class TaskStatus(str, Enum):
    TODO = "todo"
    IN_PROGRESS = "in_progress"
    BLOCKED = "blocked"
    DONE = "done"

class SubTask(BaseModel):
    title: str
    estimated_hours: float
    status: TaskStatus

class Task(BaseModel):
    title: str
    description: str
    priority: Priority
    estimated_hours: float
    dependencies: List[str] = []
    subtasks: List[SubTask] = []
    assignee: Optional[str] = None

class ProjectPlan(BaseModel):
    project_name: str
    description: str
    tasks: List[Task]
    total_estimated_hours: float
    critical_path: List[str]

def create_project_plan(requirements: str) -> ProjectPlan:
    """Generate a structured project plan from requirements"""

    response = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {
                "role": "system",
                "content": """
                You are a project manager. Create a detailed project plan
                from the given requirements. Include realistic time estimates
                and identify the critical path.
                """
            },
            {
                "role": "user",
                "content": requirements
            }
        ],
        response_format=ProjectPlan
    )

    return response.choices[0].message.parsed

# Usage
plan = create_project_plan("""
Build a mobile app for food delivery:
- User registration and authentication
- Restaurant browsing and search
- Order placement and tracking
- Payment integration
- Push notifications
""")

print(f"Project: {plan.project_name}")
print(f"Total hours: {plan.total_estimated_hours}")
print(f"Critical path: {' -> '.join(plan.critical_path)}")

JSON Schema Mode (Without Pydantic)

def extract_with_schema(text: str, schema: dict) -> dict:
    """Extract data using a raw JSON schema"""

    response = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "Extract information according to the schema."},
            {"role": "user", "content": text}
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "extraction",
                "strict": True,
                "schema": schema
            }
        }
    )

    import json
    return json.loads(response.choices[0].message.content)

# Define schema manually
person_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "email": {"type": "string"},
        "skills": {
            "type": "array",
            "items": {"type": "string"}
        }
    },
    "required": ["name", "age", "email", "skills"],
    "additionalProperties": False
}

result = extract_with_schema(
    "John is 32 years old, works as a developer. Contact: john@example.com. Knows Python, TypeScript, and Go.",
    person_schema
)

Handling Refusals

class AnalysisResult(BaseModel):
    findings: List[str]
    risk_level: str
    recommendations: List[str]

def safe_analysis(text: str) -> Optional[AnalysisResult]:
    """Handle cases where the model might refuse"""

    response = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "Analyze the security implications."},
            {"role": "user", "content": text}
        ],
        response_format=AnalysisResult
    )

    message = response.choices[0].message

    # Check for refusal
    if message.refusal:
        print(f"Model refused: {message.refusal}")
        return None

    return message.parsed

Streaming Structured Outputs

from pydantic import BaseModel
from typing import List
import json

class StepByStepSolution(BaseModel):
    problem: str
    steps: List[str]
    final_answer: str
    confidence: float

def stream_structured_response(prompt: str):
    """Stream structured output with partial JSON"""

    response = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": "Solve problems step by step."},
            {"role": "user", "content": prompt}
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "solution",
                "strict": True,
                "schema": StepByStepSolution.model_json_schema()
            }
        },
        stream=True
    )

    partial_json = ""
    for chunk in response:
        if chunk.choices[0].delta.content:
            partial_json += chunk.choices[0].delta.content
            print(chunk.choices[0].delta.content, end="", flush=True)

    print()  # Newline
    return json.loads(partial_json)

Best Practices

from pydantic import BaseModel, Field, field_validator
from typing import List, Annotated

class WellDefinedSchema(BaseModel):
    """Use Field for better descriptions and constraints"""

    name: Annotated[str, Field(
        description="The full name of the person",
        min_length=1,
        max_length=100
    )]

    age: Annotated[int, Field(
        description="Age in years",
        ge=0,
        le=150
    )]

    tags: Annotated[List[str], Field(
        description="Relevant tags, max 5",
        max_length=5
    )]

    @field_validator('name')
    @classmethod
    def name_must_not_be_empty(cls, v):
        if not v.strip():
            raise ValueError('Name cannot be empty')
        return v.strip()

# Use descriptive schema names
class CustomerFeedbackAnalysis(BaseModel):
    """Analysis of customer feedback with sentiment and action items"""

    sentiment: Annotated[str, Field(
        description="Overall sentiment: positive, negative, neutral, or mixed"
    )]

    key_points: Annotated[List[str], Field(
        description="Main points raised by the customer"
    )]

    action_items: Annotated[List[str], Field(
        description="Suggested follow-up actions for support team"
    )]

    urgency: Annotated[int, Field(
        description="Urgency level from 1 (low) to 5 (high)",
        ge=1,
        le=5
    )]

Error Handling

from openai import APIError
from pydantic import ValidationError

def robust_structured_extraction(text: str, schema_class):
    """Handle various error scenarios"""

    try:
        response = client.beta.chat.completions.parse(
            model="gpt-4o-2024-08-06",
            messages=[
                {"role": "user", "content": text}
            ],
            response_format=schema_class
        )

        if response.choices[0].message.refusal:
            return {"error": "refusal", "message": response.choices[0].message.refusal}

        return {"success": True, "data": response.choices[0].message.parsed}

    except ValidationError as e:
        return {"error": "validation", "message": str(e)}

    except APIError as e:
        return {"error": "api", "message": str(e)}

Structured Outputs transform unreliable text generation into dependable data extraction. Use them whenever you need guaranteed JSON structure from your AI applications.

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.