Coact-1_Implementation/Programmer.py at main · buiilding/Coact-1_Implementation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
"""
Programmer module for CoAct-1 Multi-Agent System

This module contains the Programmer agent implementation, including:
- ProgrammerTools: Toolkit for code execution and system operations
- Programmer agent creation logic
"""

import logging
from typing import List, Dict, Any, Optional
from agent import ComputerAgent
from computer import Computer


class ProgrammerTools:
    """A toolkit for the Programmer agent that provides code and system-level tools."""

    def __init__(self, computer: Computer):
        self._computer = computer

    async def run_command(self, command: str) -> str:
        """
        Runs a shell command and waits for output.
        Use this for commands where you need to see the results (ls, cat, grep, etc.).

        Args:
            command (str): The shell command to execute.

        Returns:
            str: The command output.
        """
        try:
            result = await self._computer.interface.run_command(command)
            output = f"Stdout:\n{result.stdout}\n"
            if result.stderr:
                output += f"Stderr:\n{result.stderr}\n"
            return output
        except Exception as e:
            return f"Error running command '{command}': {e}"

    async def run_command_in_background(self, command: str) -> str:
        """
        Runs a shell command in the background without waiting for output.
        Use this for opening applications (firefox, chrome, xterm, etc.).

        Args:
            command (str): The shell command to execute.

        Returns:
            str: Confirmation that the command was started in background.
        """
        # Run command in background with complete detachment
        background_command = f"setsid {command} >/dev/null 2>&1 &"

        # Create a task to run the command without blocking
        async def run_background_command():
            try:
                await self._computer.interface.run_command(background_command)
            except Exception:
                # Ignore errors since we're not waiting anyway
                pass

        # Start the task but don't wait for it
        import asyncio
        asyncio.create_task(run_background_command())

        # Return immediately - no output capture, no waiting
        return f"Command '{command}' started in background."

    async def list_dir(self, path: str) -> List[str]:
        """Lists the contents of a directory."""
        return await self._computer.interface.list_dir(path)

    async def read_file(self, path: str) -> str:
        """Reads the text content of a file."""
        return await self._computer.interface.read_text(path)

    async def write_file(self, path: str, content: str):
        """Writes text content to a file."""
        await self._computer.interface.write_text(path, content)

    async def venv_cmd(self, venv_name: str, command: str) -> str:
        """
        Execute a shell command in a virtual environment.

        Args:
            venv_name: Name of the virtual environment.
            command: Shell command to execute.

        Returns:
            The stdout and stderr from the command execution.
        """
        result = await self._computer.venv_cmd(venv_name, command)
        output = f"Stdout:\n{result.stdout}\n"
        if result.stderr:
            output += f"Stderr:\n{result.stderr}\n"
        return output

    async def file_exists(self, path: str) -> bool:
        """Check if a file exists."""
        return await self._computer.interface.file_exists(path)

    async def directory_exists(self, path: str) -> bool:
        """Check if a directory exists."""
        return await self._computer.interface.directory_exists(path)

    async def read_bytes(self, path: str, offset: int = 0, length: Optional[int] = None) -> bytes:
        """Read binary content from a file."""
        return await self._computer.interface.read_bytes(path, offset, length)

    async def write_bytes(self, path: str, content: bytes) -> None:
        """Write binary content to a file."""
        await self._computer.interface.write_bytes(path, content)

    async def delete_file(self, path: str) -> None:
        """Delete a file."""
        await self._computer.interface.delete_file(path)

    async def create_dir(self, path: str) -> None:
        """Create a directory."""
        await self._computer.interface.create_dir(path)

    async def delete_dir(self, path: str) -> None:
        """Delete a directory."""
        await self._computer.interface.delete_dir(path)

    async def get_file_size(self, path: str) -> int:
        """Get the size of a file in bytes."""
        return await self._computer.interface.get_file_size(path)

    async def copy_to_clipboard(self) -> str:
        """Copy content from clipboard."""
        return await self._computer.interface.copy_to_clipboard()

    async def set_clipboard(self, text: str) -> None:
        """Set clipboard content."""
        await self._computer.interface.set_clipboard(text)

    async def get_accessibility_tree(self) -> Dict:
        """Get accessibility tree for UI elements."""
        return await self._computer.interface.get_accessibility_tree()

    async def venv_install(self, venv_name: str, requirements: List[str]) -> str:
        """
        Install packages in a virtual environment.

        Args:
            venv_name: Name of the virtual environment.
            requirements: List of package names to install.

        Returns:
            Installation output.
        """
        await self._computer.venv_install(venv_name, requirements)
        return f"Installed packages {requirements} in virtual environment '{venv_name}'"


def create_programmer(programmer_model: str, programmer_tools: ProgrammerTools, screenshot_broadcast_callback=None, function_call_broadcast_callback=None) -> ComputerAgent:
    """Creates and configures the Programmer agent."""
    instructions = open("agent_prompts/Programmer.txt", "r").read()

    # Gather all methods from the toolkit instance
    programmer_tool_methods = [
        programmer_tools.run_command,
        programmer_tools.run_command_in_background,
        programmer_tools.list_dir,
        programmer_tools.read_file,
        programmer_tools.write_file,
        programmer_tools.venv_cmd,
        programmer_tools.file_exists,
        programmer_tools.directory_exists,
        programmer_tools.read_bytes,
        programmer_tools.write_bytes,
        programmer_tools.delete_file,
        programmer_tools.create_dir,
        programmer_tools.delete_dir,
        programmer_tools.get_file_size,
        programmer_tools.copy_to_clipboard,
        programmer_tools.set_clipboard,
        programmer_tools.get_accessibility_tree,
        programmer_tools.venv_install,
    ]

    # Prepare callbacks list
    callbacks = []
    if screenshot_broadcast_callback:
        from agent.callbacks import ScreenshotBroadcastCallback
        callbacks.append(ScreenshotBroadcastCallback(screenshot_broadcast_callback, "Programmer"))

    print(f"👨‍💻 [PROGRAMMER] Initializing with model: {programmer_model}")
    return ComputerAgent(
        model=programmer_model,
        tools=programmer_tool_methods,
        function_call_broadcast_callback=function_call_broadcast_callback,
        callbacks=callbacks,
        instructions=instructions,
        verbosity=logging.WARNING
    )