Skip to content
Back to Blog
1 min read

The Evolution of Tool Use in AI: From Functions to Computer Control

I wrote “The Evolution of Tool Use in AI: From Functions to Computer Control” to share practical, production-minded guidance on this topic.

The Tool Use Timeline

from dataclasses import dataclass
from typing import Dict, List, Any
from enum import Enum
from datetime import date

class ToolCapability(Enum):
    TEXT_ONLY = "text_only"                    # 2022: Just text generation
    FUNCTION_CALLING = "function_calling"       # 2023: Structured function calls
    PARALLEL_TOOLS = "parallel_tools"           # 2024: Multiple simultaneous tools
    COMPUTER_USE = "computer_use"               # 2024: Full computer control
    AUTONOMOUS_AGENTS = "autonomous_agents"     # Future: Self-directed agents

@dataclass
class ToolEvolutionStage:
    """A stage in tool use evolution"""
    name: str
    year: int
    capabilities: List[str]
    limitations: List[str]
    example_use_cases: List[str]

EVOLUTION_TIMELINE = [
    ToolEvolutionStage(
        name="Text Generation",
        year=2022,
        capabilities=[
            "Generate text responses",
            "Answer questions",
            "Write content"
        ],
        limitations=[
            "No access to external data",
            "No real-time information",
            "Cannot take actions"
        ],
        example_use_cases=[
            "Chatbots",
            "Content writing",
            "Q&A systems"
        ]
    ),
    ToolEvolutionStage(
        name="Function Calling",
        year=2023,
        capabilities=[
            "Call predefined functions",
            "Access external APIs",
            "Structured output generation",
            "Multi-turn tool use"
        ],
        limitations=[
            "Functions must be predefined",
            "Limited to API capabilities",
            "No visual understanding"
        ],
        example_use_cases=[
            "API integrations",
            "Data retrieval",
            "Action automation"
        ]
    ),
    ToolEvolutionStage(
        name="Parallel Tools & Vision",
        year=2024,
        capabilities=[
            "Multiple simultaneous tool calls",
            "Image understanding",
            "Strict schema enforcement",
            "Tool choice control"
        ],
        limitations=[
            "Still limited to defined tools",
            "No real-time interaction",
            "Cannot observe results directly"
        ],
        example_use_cases=[
            "Complex data pipelines",
            "Image analysis workflows",
            "Multi-service orchestration"
        ]
    ),
    ToolEvolutionStage(
        name="Computer Use",
        year=2024,
        capabilities=[
            "See screen content",
            "Control mouse and keyboard",
            "Interact with any application",
            "Observe action results"
        ],
        limitations=[
            "Requires visual feedback loop",
            "Slower than direct API calls",
            "Safety concerns"
        ],
        example_use_cases=[
            "Legacy system automation",
            "UI testing",
            "Desktop workflow automation"
        ]
    )
]

Comparing Tool Paradigms

from openai import OpenAI
import anthropic

# Paradigm 1: Simple Function Calling (2023)
def simple_function_calling():
    """Original function calling pattern"""

    client = OpenAI()

    tools = [{
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {"type": "string"}
                },
                "required": ["location"]
            }
        }
    }]

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": "What's the weather in Seattle?"}],
        tools=tools
    )

    # Model decides to call function
    # Developer executes and returns result
    return response

# Paradigm 2: Parallel Tools with Strict Mode (2024)
def parallel_strict_tools():
    """Enhanced function calling with parallel execution"""

    client = OpenAI()

    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get weather",
                "strict": True,
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {"type": "string"},
                        "units": {"type": "string", "enum": ["celsius", "fahrenheit"]}
                    },
                    "required": ["location", "units"],
                    "additionalProperties": False
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "get_time",
                "description": "Get current time",
                "strict": True,
                "parameters": {
                    "type": "object",
                    "properties": {
                        "timezone": {"type": "string"}
                    },
                    "required": ["timezone"],
                    "additionalProperties": False
                }
            }
        }
    ]

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Weather and time in Tokyo?"}],
        tools=tools,
        parallel_tool_calls=True  # Can call both simultaneously
    )

    return response

# Paradigm 3: Computer Use (2024)
def computer_use_paradigm():
    """Full computer control paradigm"""

    client = anthropic.Anthropic()

    tools = [
        {
            "type": "computer_20241022",
            "name": "computer",
            "display_width_px": 1920,
            "display_height_px": 1080
        }
    ]

    # Model can now see and interact with any UI
    response = client.beta.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=4096,
        tools=tools,
        messages=[{
            "role": "user",
            "content": "Open the weather app and check Seattle's weather"
        }],
        betas=["computer-use-2024-10-22"]
    )

    return response

Hybrid Tool Strategies

class HybridToolAgent:
    """Agent that uses multiple tool paradigms"""

    def __init__(self):
        self.openai_client = OpenAI()
        self.anthropic_client = anthropic.Anthropic()
        self.api_tools = self._define_api_tools()

    def execute_task(self, task: str) -> dict:
        """Execute task using best tool paradigm"""

        # Analyze task to determine best approach
        approach = self._select_approach(task)

        if approach == "api":
            return self._use_api_tools(task)
        elif approach == "computer":
            return self._use_computer_control(task)
        else:
            return self._hybrid_approach(task)

    def _select_approach(self, task: str) -> str:
        """Determine the best tool approach for a task"""

        response = self.openai_client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": """Analyze this task and determine the best approach:
                    - "api": Task can be done with API calls (faster, more reliable)
                    - "computer": Task requires UI interaction (legacy apps, visual tasks)
                    - "hybrid": Task needs both approaches

                    Return just the approach name."""
                },
                {"role": "user", "content": task}
            ]
        )

        return response.choices[0].message.content.strip().lower()

    def _use_api_tools(self, task: str) -> dict:
        """Use traditional API tools"""

        response = self.openai_client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": task}],
            tools=self.api_tools
        )

        return {
            "approach": "api",
            "response": response,
            "latency": "fast"
        }

    def _use_computer_control(self, task: str) -> dict:
        """Use computer control for UI tasks"""

        tools = [{
            "type": "computer_20241022",
            "name": "computer",
            "display_width_px": 1920,
            "display_height_px": 1080
        }]

        response = self.anthropic_client.beta.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=4096,
            tools=tools,
            messages=[{"role": "user", "content": task}],
            betas=["computer-use-2024-10-22"]
        )

        return {
            "approach": "computer",
            "response": response,
            "latency": "slow"
        }

    def _hybrid_approach(self, task: str) -> dict:
        """Combine API and computer control"""

        # Break down task
        subtasks = self._decompose_task(task)

        results = []
        for subtask in subtasks:
            approach = self._select_approach(subtask)
            if approach == "api":
                result = self._use_api_tools(subtask)
            else:
                result = self._use_computer_control(subtask)
            results.append(result)

        return {
            "approach": "hybrid",
            "subtask_results": results
        }

The Future: Autonomous Tool Creation

class FutureToolAgent:
    """Speculative: Agent that creates its own tools"""

    def __init__(self):
        self.client = OpenAI()
        self.dynamic_tools = {}

    def create_tool_for_task(self, task_description: str) -> dict:
        """Generate a new tool to accomplish a task"""

        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": """Design a tool to accomplish this task.
                    Return JSON with:
                    - tool_definition: OpenAI function schema
                    - implementation: Python code for the function
                    - dependencies: Required packages"""
                },
                {"role": "user", "content": task_description}
            ],
            response_format={"type": "json_object"}
        )

        import json
        tool_spec = json.loads(response.choices[0].message.content)

        # In the future, this could safely execute generated code
        # For now, this is speculative
        return tool_spec

    def adapt_tool(self, tool_name: str, error_message: str) -> dict:
        """Adapt an existing tool based on error feedback"""

        if tool_name not in self.dynamic_tools:
            return {"error": "Tool not found"}

        current_spec = self.dynamic_tools[tool_name]

        response = self.client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": "Modify this tool to fix the error encountered."
                },
                {
                    "role": "user",
                    "content": f"""
                    Current tool: {json.dumps(current_spec)}
                    Error encountered: {error_message}

                    Provide updated tool specification.
                    """
                }
            ],
            response_format={"type": "json_object"}
        )

        return json.loads(response.choices[0].message.content)

The evolution of tool use in AI represents a fundamental shift from constrained function execution to general-purpose computer interaction. This trajectory points toward truly autonomous AI systems that can adapt and create their own capabilities.\n\n## Takeaways\n\nAdd a concise, personal takeaway and recommended next steps here.\n

Michael John Peña

Michael John Peña

Senior Data Engineer based in Sydney. Writing about data, cloud, and technology.