From 5d00e8fdee47e1f62b1bb1600a3af4bd8f213349 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=AE=89=E8=BE=B0?= <anchen.spp@antgroup.com>
Date: Wed, 18 Mar 2026 19:44:48 +0800
Subject: [PATCH] feat: add OpenSandbox as secure code execution backend

Integrate OpenSandbox SDK as an optional execution backend so that
Python, Shell and JavaScript code can run in isolated containers
instead of directly on the host machine.  Enabled via --sandbox flag
or interpreter.sandbox_mode = True.

New files:
- SandboxManager: owns sandbox lifecycle, streaming execution, contexts
- SandboxLanguage base + SandboxPython/Shell/JavaScript subclasses

Modified:
- Terminal: conditionally routes to sandbox language handlers
- OpenInterpreter: sandbox_mode, sandbox_api_key, sandbox_domain config
- CLI: --sandbox, --sandbox_api_key, --sandbox_domain arguments
- pyproject.toml: opensandbox optional dependency group

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../terminal/languages/sandbox/__init__.py    |  26 ++
 .../languages/sandbox/sandbox_language.py     |  33 +++
 .../languages/sandbox/sandbox_manager.py      | 239 ++++++++++++++++++
 .../core/computer/terminal/terminal.py        |  59 +++--
 interpreter/core/core.py                      |   8 +
 .../start_terminal_interface.py               |  22 ++
 pyproject.toml                                |   5 +
 7 files changed, 376 insertions(+), 16 deletions(-)
 create mode 100644 interpreter/core/computer/terminal/languages/sandbox/__init__.py
 create mode 100644 interpreter/core/computer/terminal/languages/sandbox/sandbox_language.py
 create mode 100644 interpreter/core/computer/terminal/languages/sandbox/sandbox_manager.py

diff --git a/interpreter/core/computer/terminal/languages/sandbox/__init__.py b/interpreter/core/computer/terminal/languages/sandbox/__init__.py
new file mode 100644
index 0000000000..fd45d31ee6
--- /dev/null
+++ b/interpreter/core/computer/terminal/languages/sandbox/__init__.py
@@ -0,0 +1,26 @@
+"""
+Sandbox-backed language implementations for OpenSandbox execution.
+"""
+
+from .sandbox_language import SandboxLanguage
+
+
+class SandboxPython(SandboxLanguage):
+    file_extension = "py"
+    name = "Python"
+    aliases = ["py"]
+    sandbox_lang = "python"
+
+
+class SandboxShell(SandboxLanguage):
+    file_extension = "sh"
+    name = "Shell"
+    aliases = ["bash", "sh", "zsh"]
+    sandbox_lang = "bash"
+
+
+class SandboxJavaScript(SandboxLanguage):
+    file_extension = "js"
+    name = "JavaScript"
+    aliases = ["js"]
+    sandbox_lang = "javascript"
diff --git a/interpreter/core/computer/terminal/languages/sandbox/sandbox_language.py b/interpreter/core/computer/terminal/languages/sandbox/sandbox_language.py
new file mode 100644
index 0000000000..54de0b09ae
--- /dev/null
+++ b/interpreter/core/computer/terminal/languages/sandbox/sandbox_language.py
@@ -0,0 +1,33 @@
+"""
+SandboxLanguage - Base class for sandbox-backed language execution.
+
+Extends BaseLanguage and delegates all execution to a shared SandboxManager.
+"""
+
+from ...base_language import BaseLanguage
+
+
+class SandboxLanguage(BaseLanguage):
+    """
+    Base class for languages that execute code in an OpenSandbox container.
+
+    Subclasses set `name`, `aliases`, `file_extension`, and `sandbox_lang`
+    to define which language they handle.
+    """
+
+    _is_sandbox_language = True  # Marker for Terminal instantiation logic
+    sandbox_lang = None  # Override in subclasses: "python", "bash", "javascript"
+
+    def __init__(self, sandbox_manager):
+        self.sandbox_manager = sandbox_manager
+
+    def run(self, code):
+        yield from self.sandbox_manager.execute(self.sandbox_lang, code)
+
+    def stop(self):
+        self.sandbox_manager.stop()
+
+    def terminate(self):
+        # Don't kill the whole sandbox from one language handler.
+        # The SandboxManager.terminate() is called by Terminal.terminate().
+        pass
diff --git a/interpreter/core/computer/terminal/languages/sandbox/sandbox_manager.py b/interpreter/core/computer/terminal/languages/sandbox/sandbox_manager.py
new file mode 100644
index 0000000000..18936e2d2c
--- /dev/null
+++ b/interpreter/core/computer/terminal/languages/sandbox/sandbox_manager.py
@@ -0,0 +1,239 @@
+"""
+SandboxManager - Manages OpenSandbox lifecycle and code execution.
+
+Owns a single sandbox instance and CodeInterpreterSync, providing
+streaming code execution that yields LMC-format chunks.
+"""
+
+import os
+import queue
+import threading
+import traceback
+
+LANGUAGE_MAP = {
+    "python": "python",
+    "py": "python",
+    "shell": "bash",
+    "bash": "bash",
+    "sh": "bash",
+    "zsh": "bash",
+    "javascript": "javascript",
+    "js": "javascript",
+}
+
+# Default sandbox image for code interpreter
+DEFAULT_IMAGE = "opensandbox/code-interpreter:latest"
+
+
+class SandboxManager:
+    def __init__(
+        self,
+        api_key=None,
+        domain=None,
+        image=None,
+        timeout_minutes=10,
+    ):
+        self.api_key = api_key or os.environ.get("OPEN_SANDBOX_API_KEY")
+        self.domain = domain or os.environ.get("OPEN_SANDBOX_DOMAIN")
+        self.image = image or os.environ.get("OPEN_SANDBOX_IMAGE", DEFAULT_IMAGE)
+        self.timeout_minutes = timeout_minutes
+
+        self._sandbox = None
+        self._code_interpreter = None
+        self._contexts = {}  # language_name -> CodeContextSync
+        self._current_execution_id = None
+        self._lock = threading.Lock()
+
+    def _ensure_sandbox(self):
+        """Lazily create the sandbox and code interpreter on first use."""
+        if self._sandbox is not None:
+            return
+
+        try:
+            from opensandbox.sync.sandbox import SandboxSync
+            from opensandbox.config import ConnectionConfig
+            from code_interpreter.sync.code_interpreter import CodeInterpreterSync
+        except ImportError:
+            raise ImportError(
+                "OpenSandbox packages are required for sandbox mode. "
+                "Install with: pip install opensandbox opensandbox-code-interpreter"
+            )
+
+        if not self.api_key:
+            raise ValueError(
+                "OpenSandbox API key is required. "
+                "Set via --sandbox_api_key or OPEN_SANDBOX_API_KEY env var."
+            )
+        if not self.domain:
+            raise ValueError(
+                "OpenSandbox domain is required. "
+                "Set via --sandbox_domain or OPEN_SANDBOX_DOMAIN env var."
+            )
+
+        from datetime import timedelta
+
+        config = ConnectionConfig(
+            api_key=self.api_key,
+            domain=self.domain,
+        )
+
+        self._sandbox = SandboxSync.create(
+            self.image,
+            connection_config=config,
+            timeout=timedelta(minutes=self.timeout_minutes),
+        )
+        self._code_interpreter = CodeInterpreterSync.create(sandbox=self._sandbox)
+
+    def _get_context(self, language):
+        """Get or create an execution context for the given language."""
+        sandbox_lang = LANGUAGE_MAP.get(language.lower())
+        if sandbox_lang is None:
+            raise ValueError(
+                f"Language '{language}' is not supported in sandbox mode. "
+                f"Supported: {list(LANGUAGE_MAP.keys())}"
+            )
+
+        if sandbox_lang not in self._contexts:
+            self._contexts[sandbox_lang] = (
+                self._code_interpreter.codes.create_context(sandbox_lang)
+            )
+        return self._contexts[sandbox_lang], sandbox_lang
+
+    def execute(self, language, code):
+        """
+        Execute code in the sandbox. Generator yielding LMC-format dicts.
+
+        Mirrors the streaming pattern from SubprocessLanguage.run().
+        """
+        try:
+            self._ensure_sandbox()
+        except Exception:
+            yield {
+                "type": "console",
+                "format": "output",
+                "content": traceback.format_exc(),
+            }
+            return
+
+        try:
+            context, sandbox_lang = self._get_context(language)
+        except Exception:
+            yield {
+                "type": "console",
+                "format": "output",
+                "content": traceback.format_exc(),
+            }
+            return
+
+        from opensandbox.models.execd_sync import ExecutionHandlersSync
+
+        message_queue = queue.Queue()
+        done_event = threading.Event()
+        execution_result = [None]  # mutable container for thread result
+
+        def on_stdout(msg):
+            message_queue.put({
+                "type": "console",
+                "format": "output",
+                "content": msg.text,
+            })
+
+        def on_stderr(msg):
+            message_queue.put({
+                "type": "console",
+                "format": "output",
+                "content": msg.text,
+            })
+
+        def on_error(err):
+            tb = "\n".join(err.traceback) if err.traceback else ""
+            content = f"{err.name}: {err.value}"
+            if tb:
+                content = f"{tb}\n{content}"
+            message_queue.put({
+                "type": "console",
+                "format": "output",
+                "content": content,
+            })
+
+        def on_execution_complete(complete):
+            done_event.set()
+
+        handlers = ExecutionHandlersSync(
+            on_stdout=on_stdout,
+            on_stderr=on_stderr,
+            on_error=on_error,
+            on_execution_complete=on_execution_complete,
+        )
+
+        def run_in_thread():
+            try:
+                result = self._code_interpreter.codes.run(
+                    code,
+                    context=context,
+                    handlers=handlers,
+                )
+                execution_result[0] = result
+                if result and result.id:
+                    self._current_execution_id = result.id
+            except Exception:
+                message_queue.put({
+                    "type": "console",
+                    "format": "output",
+                    "content": traceback.format_exc(),
+                })
+            finally:
+                done_event.set()
+
+        thread = threading.Thread(target=run_in_thread, daemon=True)
+        thread.start()
+
+        # Yield output as it arrives, same pattern as SubprocessLanguage
+        while True:
+            try:
+                output = message_queue.get(timeout=0.3)
+                yield output
+            except queue.Empty:
+                if done_event.is_set():
+                    # Drain remaining items
+                    while not message_queue.empty():
+                        yield message_queue.get()
+                    break
+
+        # If execution produced results (e.g. expression values), yield them
+        result = execution_result[0]
+        if result and result.result:
+            for r in result.result:
+                if r.text:
+                    yield {
+                        "type": "console",
+                        "format": "output",
+                        "content": r.text,
+                    }
+
+        self._current_execution_id = None
+
+    def stop(self):
+        """Interrupt currently running execution."""
+        exec_id = self._current_execution_id
+        if exec_id and self._code_interpreter:
+            try:
+                self._code_interpreter.codes.interrupt(exec_id)
+            except Exception:
+                pass
+
+    def terminate(self):
+        """Kill the sandbox and release all resources."""
+        if self._sandbox:
+            try:
+                self._sandbox.kill()
+            except Exception:
+                pass
+            try:
+                self._sandbox.close()
+            except Exception:
+                pass
+        self._sandbox = None
+        self._code_interpreter = None
+        self._contexts = {}
+        self._current_execution_id = None
diff --git a/interpreter/core/computer/terminal/terminal.py b/interpreter/core/computer/terminal/terminal.py
index b9f92582f6..073143e1d8 100644
--- a/interpreter/core/computer/terminal/terminal.py
+++ b/interpreter/core/computer/terminal/terminal.py
@@ -33,18 +33,42 @@
 class Terminal:
     def __init__(self, computer):
         self.computer = computer
-        self.languages = [
-            Ruby,
-            Python,
-            Shell,
-            JavaScript,
-            HTML,
-            AppleScript,
-            R,
-            PowerShell,
-            React,
-            Java,
-        ]
+        self._sandbox_manager = None
+
+        if getattr(computer.interpreter, "sandbox_mode", False):
+            from .languages.sandbox import SandboxPython, SandboxShell, SandboxJavaScript
+            from .languages.sandbox.sandbox_manager import SandboxManager
+
+            self._sandbox_manager = SandboxManager(
+                api_key=getattr(computer.interpreter, "sandbox_api_key", None),
+                domain=getattr(computer.interpreter, "sandbox_domain", None),
+            )
+            self.languages = [
+                Ruby,
+                SandboxPython,
+                SandboxShell,
+                SandboxJavaScript,
+                HTML,
+                AppleScript,
+                R,
+                PowerShell,
+                React,
+                Java,
+            ]
+        else:
+            self.languages = [
+                Ruby,
+                Python,
+                Shell,
+                JavaScript,
+                HTML,
+                AppleScript,
+                R,
+                PowerShell,
+                React,
+                Java,
+            ]
+
         self._active_languages = {}
 
     def sudo_install(self, package):
@@ -88,7 +112,7 @@ def run(self, language, code, stream=False, display=False):
             else:
                 return [{"type": "console", "format": "output", "content": f"Failed to install package {package}."}]
 
-        if language == "python":
+        if language == "python" and not self._sandbox_manager:
             if (
                 self.computer.import_computer_api
                 and not self.computer._has_imported_computer_api
@@ -155,10 +179,10 @@ def run(self, language, code, stream=False, display=False):
 
     def _streaming_run(self, language, code, display=False):
         if language not in self._active_languages:
-            # Get the language. Pass in self.computer *if it takes a single argument*
-            # but pass in nothing if not. This makes custom languages easier to add / understand.
             lang_class = self.get_language(language)
-            if lang_class.__init__.__code__.co_argcount > 1:
+            if hasattr(lang_class, '_is_sandbox_language') and lang_class._is_sandbox_language:
+                self._active_languages[language] = lang_class(self._sandbox_manager)
+            elif lang_class.__init__.__code__.co_argcount > 1:
                 self._active_languages[language] = lang_class(self.computer)
             else:
                 self._active_languages[language] = lang_class()
@@ -205,3 +229,6 @@ def terminate(self):
             ):  # Not sure why this is None sometimes. We should look into this
                 language.terminate()
             del self._active_languages[language_name]
+
+        if self._sandbox_manager:
+            self._sandbox_manager.terminate()
diff --git a/interpreter/core/core.py b/interpreter/core/core.py
index b964b745f9..d9406f3de5 100644
--- a/interpreter/core/core.py
+++ b/interpreter/core/core.py
@@ -80,6 +80,9 @@ def __init__(
         multi_line=True,
         contribute_conversation=False,
         plain_text_display=False,
+        sandbox_mode=False,
+        sandbox_api_key=None,
+        sandbox_domain=None,
     ):
         # State
         self.messages = [] if messages is None else messages
@@ -101,6 +104,11 @@ def __init__(
         self.plain_text_display = plain_text_display
         self.highlight_active_line = True  # additional setting to toggle active line highlighting. Defaults to True
 
+        # Sandbox settings
+        self.sandbox_mode = sandbox_mode
+        self.sandbox_api_key = sandbox_api_key
+        self.sandbox_domain = sandbox_domain
+
         # Loop messages
         self.loop = loop
         self.loop_message = loop_message
diff --git a/interpreter/terminal_interface/start_terminal_interface.py b/interpreter/terminal_interface/start_terminal_interface.py
index 60dadc902a..28a800189d 100644
--- a/interpreter/terminal_interface/start_terminal_interface.py
+++ b/interpreter/terminal_interface/start_terminal_interface.py
@@ -245,6 +245,28 @@ def start_terminal_interface(interpreter):
             "help_text": "experimentally let Open Interpreter control your mouse and keyboard (shortcut for `interpreter --profile os`)",
             "type": bool,
         },
+        # Sandbox settings
+        {
+            "name": "sandbox",
+            "nickname": "sb",
+            "help_text": "run code in a secure OpenSandbox container instead of locally",
+            "type": bool,
+            "attribute": {"object": interpreter, "attr_name": "sandbox_mode"},
+        },
+        {
+            "name": "sandbox_api_key",
+            "nickname": "sbk",
+            "help_text": "API key for OpenSandbox (or set OPEN_SANDBOX_API_KEY env var)",
+            "type": str,
+            "attribute": {"object": interpreter, "attr_name": "sandbox_api_key"},
+        },
+        {
+            "name": "sandbox_domain",
+            "nickname": "sbd",
+            "help_text": "domain for OpenSandbox server (or set OPEN_SANDBOX_DOMAIN env var)",
+            "type": str,
+            "attribute": {"object": interpreter, "attr_name": "sandbox_domain"},
+        },
         # Special commands
         {
             "name": "reset_profile",
diff --git a/pyproject.toml b/pyproject.toml
index 7e9c403c20..49f8f540ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,10 @@ easyocr = { version = "^1.7.1", optional = true }
 # Optional [server] dependencies
 janus = { version = "^1.0.0", optional = true }
 
+# Optional [sandbox] dependencies
+opensandbox = { version = ">=0.1.0", optional = true }
+opensandbox-code-interpreter = { version = ">=0.1.0", optional = true }
+
 # Required dependencies
 python = ">=3.9,<3.13"
 setuptools = "*"
@@ -77,6 +81,7 @@ os = ["opencv-python", "pyautogui", "plyer", "pywinctl", "pytesseract", "sentenc
 safe = ["semgrep"]
 local = ["opencv-python", "pytesseract", "torch", "transformers", "einops", "torchvision", "easyocr"]
 server = ["fastapi", "janus", "uvicorn"]
+sandbox = ["opensandbox", "opensandbox-code-interpreter"]
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.10.1"