diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml index 523a763b62..0931be2c51 100644 --- a/.github/workflows/python-integration-tests.yml +++ b/.github/workflows/python-integration-tests.yml @@ -131,7 +131,7 @@ jobs: --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 - # Misc integration tests (Anthropic, Ollama, MCP) + # Misc integration tests (Anthropic, Hyperlight, Ollama, MCP) python-tests-misc-integration: name: Python Integration Tests - Misc runs-on: ubuntu-latest @@ -162,10 +162,11 @@ jobs: fallback_url: ${{ env.LOCAL_MCP_URL }} - name: Prefer local MCP URL when available run: echo "LOCAL_MCP_URL=${{ steps.local-mcp.outputs.effective_url }}" >> "$GITHUB_ENV" - - name: Test with pytest (Anthropic, Ollama, MCP integration) + - name: Test with pytest (Anthropic, Hyperlight, Ollama, MCP integration) run: > uv run pytest --import-mode=importlib packages/anthropic/tests + packages/hyperlight/tests packages/ollama/tests packages/core/tests/core/test_mcp.py -m integration diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml index 4fc47af595..ccc2966c7e 100644 --- a/.github/workflows/python-merge-tests.yml +++ b/.github/workflows/python-merge-tests.yml @@ -65,6 +65,7 @@ jobs: - 'python/samples/**/providers/azure/**' misc: - 'python/packages/anthropic/**' + - 'python/packages/hyperlight/**' - 'python/packages/ollama/**' - 'python/packages/core/agent_framework/_mcp.py' - 'python/packages/core/tests/core/test_mcp.py' @@ -278,10 +279,11 @@ jobs: fallback_url: ${{ env.LOCAL_MCP_URL }} - name: Prefer local MCP URL when available run: echo "LOCAL_MCP_URL=${{ steps.local-mcp.outputs.effective_url }}" >> "$GITHUB_ENV" - - name: Test with pytest (Anthropic, Ollama, MCP integration) + - name: Test with pytest (Anthropic, Hyperlight, Ollama, MCP integration) run: > uv run pytest --import-mode=importlib packages/anthropic/tests + packages/hyperlight/tests packages/ollama/tests packages/core/tests/core/test_mcp.py -m integration diff --git a/docs/decisions/0024-codeact-integration.md b/docs/decisions/0024-codeact-integration.md new file mode 100644 index 0000000000..fda4d3cb7d --- /dev/null +++ b/docs/decisions/0024-codeact-integration.md @@ -0,0 +1,227 @@ +--- +status: proposed +contact: eavanvalkenburg +date: 2026-04-07 +deciders: TBD +consulted: +informed: +--- + +# CodeAct integration through backend-specific context providers and an `execute_code` tool + +## Context and Problem Statement + +We need an architecture design that supports CodeAct in both Python and .NET. This is a necessary capability for the current generation of long-running agents, which need to plan, iterate, transform tool outputs, and execute bounded code inside a controlled runtime instead of pushing every intermediate step back through the model. The design should preserve the same behavioral contract across SDKs, but it does not need to use the same internal extension point in each runtime. We also want to standardize on Hyperlight as the initial backend, using the existing Python package and an anticipated .NET binding package once it is available. + +Throughout this ADR, **CodeAct** is the primary term. **Code mode** and **programmatic tool calling** refer to the same capability. This ADR uses **CodeAct** consistently. + +Model-generated code is treated as untrusted relative to the host process. This ADR assumes the selected backend provides the primary isolation boundary, while the framework is responsible for configuring approvals and capabilities, integrating telemetry, and translating outputs and failures into framework-native shapes. If a backend cannot provide isolation appropriate for its trust model, it is not a suitable CodeAct backend. + +The core design question is: **where should CodeAct integrate into the agent pipeline so that both SDKs can offer the same functionality without invasive changes to their core function-calling loops?** + +## Decision Drivers + +- CodeAct must shape the model-facing surface before model invocation, not only after the model has already chosen tools. +- The design should let users control which tools are available through CodeAct and which remain regular tools only. +- The design must preserve existing session, approval, telemetry, and tool invocation behavior as much as possible. +- The design should define the minimum cross-SDK telemetry and failure semantics for `execute_code`, so Python and .NET do not diverge on basic observability or error handling. +- The design must fit naturally into the extension points that already exist in each SDK. +- The design must be safe for concurrent runs and must not rely on mutating shared agent configuration during invocation. +- The chosen structure should allow multiple backend-specific providers to fit under the same conceptual design over time, even though Hyperlight is the initial target. +- The abstraction should not assume that every backend is a VM-style sandbox; alternative execution models such as Pydantic's Monty should also fit. +- The design should allow `execute_code` to be reused both as a tool-enabled CodeAct runtime and as a standard code interpreter tool implementation. +- The design should remain open to alternative language/runtime modes, such as JavaScript on Hyperlight, rather than baking the abstraction to Python only. +- The design should provide a portable way to configure sandbox capabilities such as file access and network access, including allow-listed outbound domains. +- Using CodeAct should be optional, and installing its runtime or backend dependencies should also be optional. +- Backend-specific dependencies should be isolated behind a small adapter so SDK code is not tightly coupled to an unstable package surface. + +## Considered Options + +- **Option 1**: Standardize on context provider-based CodeAct with a shared cross-SDK contract and backend-specific public types +- **Option 2**: Implement CodeAct as a dedicated chat-client decorator/wrapper +- **Option 3**: Integrate CodeAct directly into the function invocation layer/FunctionInvokingChatClient + +## Pros and Cons of the Options + +### Option 1: Standardize on context provider-based CodeAct with a shared cross-SDK contract and backend-specific public types + +This option uses `ContextProvider` in Python and `AIContextProvider` in .NET, but standardizes the public concept and behavior. +In this option, the CodeAct tool set is provider-owned: only tools explicitly configured on the concrete CodeAct provider instance are available inside CodeAct, and the provider exposes direct CRUD-style management for tools, file mounts, and outbound network allow-list configuration rather than requiring a separate runtime setup object. +The agent's direct tool surface remains separate. If a tool should be available both through CodeAct and as a normal direct tool, it is configured in both places. + +- Good, because both SDKs already have first-class provider concepts intended for per-invocation context shaping. +- Good, because providers operate before model invocation, which is where CodeAct must add instructions and reshape tools. +- Good, because this lets us preserve existing function invocation behavior rather than rewriting it. +- Good, because slightly different internals are acceptable while the public behavior remains aligned. +- Good, because convenience builder/decorator helpers can still be added later on top of the provider model without changing the core design. +- Good, because backend-specific runtime logic can stay inside concrete provider implementations or internal helpers instead of being forced into a lowest-common-denominator public abstraction. +- Good, because the same provider structure can support either an all-or-nothing tool surface or a mixed side-by-side tool surface. +- Good, because users can keep some tools direct-only while allowing other tools to be used from inside CodeAct. +- Good, because a provider-owned CodeAct tool registry avoids mutating or inferring the agent's direct tool surface and can work consistently in both SDKs. +- Good, because the same conceptual design can remain open to `HyperlightCodeActProvider`, a future `MontyCodeActProvider`, and other backend-specific providers over time. +- Good, because `execute_code` can evolve into multiple backend-specific runtime modes rather than being hard-wired to one Python-plus-tools mode. +- Bad, because it is a bolt-on, which might make it less runtime efficient. + +### Option 2: Implement CodeAct as a dedicated chat-client decorator/wrapper + +This option would introduce a CodeAct-specific chat-client decorator that injects instructions and tools directly into the chat request pipeline. + +- Good, because this is a natural fit for .NET's `DelegatingChatClient` pipeline. +- Good, because it can also support advanced custom chat-client stacks. +- Good, because backend-specific runtime selection could be hidden inside the decorator implementation. +- Good, because the decorator could also encapsulate mode-specific instruction shaping for tool-enabled versus standalone interpreter behavior. +- Good, because the decorator can decide per request whether the tool surface is exclusive or mixed. +- Bad, because Python can support this by building a custom layering stack on top of a `Raw...Client` and swapping in a different `FunctionInvocationLayer`, but that composition path is more manual than the .NET `DelegatingChatClient` pipeline. +- Bad, because it duplicates responsibilities already handled by provider abstractions. +- Bad, because it makes CodeAct look more transport-specific than it really is. +- Bad, because swappable backends and reusable interpreter or language modes become coupled to chat-client composition rather than modeled as first-class CodeAct concepts. + +### Option 3: Integrate CodeAct directly into the function invocation layer/FunctionInvokingChatClient + +This option would push CodeAct into Python's `FunctionInvocationLayer` and .NET's `FunctionInvokingChatClient` or related middleware. + +- Good, because it is close to tool execution and can observe concrete tool invocation behavior. +- Good, because function middleware may still be useful later for auxiliary auditing or policy around sandbox-originated tool calls. +- Bad, because this is the wrong layer for constructing the model-facing tool surface and prompt instructions. +- Bad, because it does not naturally control whether the model sees an exclusive CodeAct tool surface or a mixed side-by-side tool surface. +- Bad, because it would still require a second mechanism for hiding normal tools and advertising `execute_code`. +- Bad, because it is a weak fit for standalone interpreter modes where no tool-calling loop is needed. +- Bad, because backend selection and CodeAct mode behavior are orthogonal concerns that do not belong in the function invocation layer. +- Bad, because `.NET` would become more tightly coupled to `FunctionInvokingChatClient`, which sits below the agent framework abstraction and is not the natural cross-SDK design seam. + +## Approval Model Options + +- **Option A**: Bundled approval for the `execute_code` invocation +- **Option B**: Pre-execution inspection of `call_tool(...)` references before approving `execute_code` +- **Option C**: Nested per-tool approvals during `execute_code` + +## Pros and Cons of the Approval Options + +### Option A: Bundled approval for the `execute_code` invocation + +This option grants approval once, before `execute_code` starts. Provider-owned tool calls made from inside that execution run under the same approval. The effective approval of `execute_code` is determined up front from the provider configuration rather than from inspecting which tools are actually called during execution. + +- Good, because it is the simplest model to explain and implement consistently in both SDKs. +- Good, because it fits naturally with long-running CodeAct loops where repeated approval interruptions would be disruptive. +- Good, because it does not require static code analysis before execution begins. +- Good, because it keeps the first release focused on the provider integration rather than a more complex approval engine. +- Bad, because approval is coarse-grained and may cover more activity than the user expected. +- Bad, because it provides less visibility into which provider-owned tools or capabilities will be exercised during the run. + +### Option B: Pre-execution inspection of `call_tool(...)` references before approving `execute_code` + +This option inspects submitted code for statically discoverable `call_tool("tool_name", ...)` references before execution starts and uses that information to shape the approval request. + +- Good, because it can show users more detail up front while still keeping approval at a single pre-execution moment. +- Good, because it matches the common case where tool names are spelled out directly in the generated code. +- Good, because it can coexist with bundled approval as a more informative variant of the same UX. +- Bad, because the analysis is inherently best-effort and cannot reliably predict dynamic behavior. +- Bad, because it requires duplicated parsing or inspection logic that does not replace runtime enforcement. + +### Option C: Nested per-tool approvals during `execute_code` + +This option requests approval when sandboxed code actually attempts to invoke a provider-owned tool that requires approval. + +- Good, because it aligns approval with real behavior rather than predicted behavior. +- Good, because it gives precise visibility into which provider-owned tools are being used. +- Good, because it can allow some tool calls while rejecting others within the same execution. +- Bad, because it interrupts long-running CodeAct flows and can degrade the user experience significantly. +- Bad, because it requires more complex runtime plumbing and approval UX in both SDKs. +- Bad, because repeated approval pauses may make CodeAct less useful for the exact long-running scenarios that motivate this feature. + +## Decision Outcomes + +### Decision 1: Integration seam and public structure + +Chosen option: **Option 1: Standardize on provider-based CodeAct with a shared cross-SDK contract and backend-specific public types**, because it is the only option that maps cleanly to both SDKs, lets us reshape instructions and tools before model invocation, and avoids invasive changes to the existing function invocation loops while still allowing multiple backend-specific providers and multiple runtime modes to fit under the same structure later. + +### Decision 2: Initial approval model + +Chosen option: **Option A: Bundled approval for the `execute_code` invocation**, because it is the smallest approval model that fits both SDKs, works well for long-running CodeAct flows, and does not force us to standardize a more complex inspection or policy engine in the first release. + +This follows the spirit of the current Python tool approval flow, where `FunctionTool` uses `approval_mode="always_require" | "never_require"` and the auto-invocation loop escalates the whole batch when any called tool requires approval. + +### Design summary + +We standardize the **public concept** of CodeAct across SDKs while allowing each SDK to use the extension point that fits it best. + +- Python uses a `ContextProvider`. +- .NET uses an `AIContextProvider`. +- The term **CodeAct context provider** is used throughout this ADR as a design concept, not as a required public base type. Public SDK APIs should prefer concrete backend-specific types such as `HyperlightCodeActProvider` rather than a public abstract `CodeActContextProvider` or a public `CodeActExecutor` parameter. +- CodeAct support should ship as an optional package in each SDK rather than as part of the core package, so users who do not need CodeAct do not take on its installation and dependency footprint. That optional package may still depend on a few small, backward-compatible hooks in the host SDK's core agent pipeline. +- There is no separate runtime setup object in the chosen design. Concrete providers manage their provider-owned CodeAct tool registry, file mounts, and outbound network allow-list configuration directly through CRUD-style methods on the provider itself. +- At a high level, CodeAct is exposed through backend-specific context providers that contribute an `execute_code` tool, own the CodeAct-specific tool registry, and carry backend capability configuration such as filesystem and network access. +- The initial approval model is bundled approval for `execute_code`, using the same `approval_mode="always_require" | "never_require"` vocabulary as regular tools. +- The CodeAct provider exposes a default `approval_mode` for `execute_code`. If the provider default is `always_require`, `execute_code` is always treated as `always_require` regardless of the provider-owned tool registry. If the provider default is `never_require`, the effective approval for `execute_code` is derived from the provider-owned CodeAct tool registry captured for the run. +- If every provider-owned CodeAct tool in that registry has `approval_mode="never_require"`, `execute_code` is treated as `never_require`. If any provider-owned CodeAct tool in that registry has `approval_mode="always_require"`, `execute_code` is treated as `always_require`, even if the generated code may not end up calling that tool. +- Approval is granted before `execute_code` starts, and provider-owned tool calls made from inside that execution run under the same approval. +- Direct-only agent tools do not affect the approval of `execute_code`; only the provider-owned CodeAct tool registry participates in that calculation. +- This approval model is intentionally conservative. If one sensitive provider-owned tool forces `execute_code` to require approval more often than desired, the mitigation is to keep that tool direct-only or split it into a different provider/tool surface rather than trying to infer per-run tool usage up front. +- Configuring filesystem and network capability state on the provider, including adding file mounts or outbound network allow-list entries, is itself the approval for those capabilities in the initial model. +- Each `execute_code` invocation must start from a clean execution state; in-memory variables and other ephemeral interpreter/runtime state must not persist across separate calls. When a provider exposes a workspace, mounted files, or a writable artifact/output area, those files are the supported persistence mechanism across calls and are treated as external state rather than interpreter state. +- Mutating the provider's tool registry or capability configuration while a run is in flight is allowed, but it only affects subsequent runs. Provider implementations must snapshot the effective state for each run and synchronize concurrent access so shared provider instances remain safe across concurrent runs. +- The minimum cross-SDK telemetry contract is that `execute_code` is traced as a normal tool invocation nested inside the surrounding agent run, and provider-owned tool calls made from inside CodeAct continue to emit ordinary tool-invocation telemetry. Backend-specific resource metrics are optional extensions, not a required new top-level cross-SDK event model. +- Timeout, out-of-memory, backend crash, and similar sandbox failures are all execution failures of `execute_code` and should surface as structured error results rather than backend-specific public DTOs. Partial textual or file outputs may be returned only when the backend can report them unambiguously; callers must not rely on partial-output recovery as a portable guarantee. +- The provider-based structure preserves room for future pre-execution inspection and nested per-tool approvals if later experience shows they are needed. +- Concrete backend-specific providers may still use small SDK-local helpers or adapters internally, but that split is an implementation detail rather than a public API requirement. + +Detailed language-specific implementation notes are specified in: + +- [Python implementation](../features/code_act/python-implementation.md) +- [.NET implementation](../features/code_act/dotnet-implementation.md) + +### Minimal core hooks required by the optional package + +CodeAct remains optional at the package level, but the optional package depends on a small number of hooks that must live in the host SDK because the agent pipeline owns model invocation and per-run tool resolution. + +- Python depends on the existing `ContextProvider` lifecycle, `SessionContext.extend_instructions(...)`, `SessionContext.extend_tools(...)`, per-run runtime tool access via `SessionContext.options["tools"]`, and the shared `ApprovalMode` vocabulary used by `FunctionTool`. +- .NET depends on the existing `AIContextProvider` seam, agent/runtime support for applying providers before model invocation, and the existing chat-client or function-invocation seams that concrete implementations use to contribute `execute_code`. + +These hooks are backward-compatible because they only expose or forward per-run state that core already owns. Behavior changes only when a concrete CodeAct provider opts in and uses them. + +### Concrete provider implementation contract + +The design does not require a public abstract `CodeActContextProvider` base class, but it does require a stable implementation contract for concrete providers. + +- Concrete providers should expose a standard capability surface at construction time, with SDK-appropriate naming for: + - approval mode + - workspace root + - file mounts + - allowed outbound targets plus any per-target method or policy restrictions needed by the backend +- Separate public `filesystem_mode` / `network_mode` flags are not required by the cross-SDK contract. Filesystem access may be disabled implicitly until a workspace or file mounts are configured, and outbound network may be disabled implicitly until an allow-list or equivalent outbound policy entry is configured. +- Concrete providers should expose direct CRUD-style methods for managing the provider-owned CodeAct tool registry, file mounts, and outbound network allow-list configuration, rather than requiring callers to construct a separate runtime setup object. +- Concrete providers should implement their host SDK's provider lifecycle hooks to: + - build CodeAct instructions, + - add `execute_code`, + - snapshot the effective CodeAct tool registry and capability settings for the run, + - compute the effective approval requirement for `execute_code`, + - configure file access and network access for the backend, + - prepare or restore execution state, + - execute code, + - and translate backend output into framework-native content. +- Any internal abstract/helper surface shared by multiple concrete providers should standardize responsibilities for: + - instruction construction, + - file-access configuration, + - network-access configuration, + - environment preparation/restoration, + - code execution, + - and output-to-content conversion. +- Backend execution output should reuse existing framework-native content/message primitives rather than introducing backend-specific public result DTOs. + +## More Information + +### Related artifacts + +- Python implementation: [`docs/features/code_act/python-implementation.md`](../features/code_act/python-implementation.md) +- .NET implementation: [`docs/features/code_act/dotnet-implementation.md`](../features/code_act/dotnet-implementation.md) +- Python provider/session APIs: [`python/packages/core/agent_framework/_sessions.py`](../../python/packages/core/agent_framework/_sessions.py) +- Python function invocation loop: [`python/packages/core/agent_framework/_tools.py`](../../python/packages/core/agent_framework/_tools.py) +- .NET context provider abstraction: [`dotnet/src/Microsoft.Agents.AI.Abstractions/AIContextProvider.cs`](../../dotnet/src/Microsoft.Agents.AI.Abstractions/AIContextProvider.cs) +- .NET agent integration for context providers: [`dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs`](../../dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs) +- Optional .NET chat-client provider decorator: [`dotnet/src/Microsoft.Agents.AI/AIContextProviderDecorators/AIContextProviderChatClient.cs`](../../dotnet/src/Microsoft.Agents.AI/AIContextProviderDecorators/AIContextProviderChatClient.cs) +- .NET function invocation middleware seam: [`dotnet/src/Microsoft.Agents.AI/FunctionInvocationDelegatingAgentBuilderExtensions.cs`](../../dotnet/src/Microsoft.Agents.AI/FunctionInvocationDelegatingAgentBuilderExtensions.cs) + +### Related decisions + +- [0015-agent-run-context](0015-agent-run-context.md) +- [0016-python-context-middleware](0016-python-context-middleware.md) diff --git a/docs/features/code_act/dotnet-implementation.md b/docs/features/code_act/dotnet-implementation.md new file mode 100644 index 0000000000..087e564d62 --- /dev/null +++ b/docs/features/code_act/dotnet-implementation.md @@ -0,0 +1,6 @@ +# CodeAct .NET implementation + +This document will describe the .NET realization of the CodeAct design in +[`docs/decisions/0024-codeact-integration.md`](../../decisions/0024-codeact-integration.md). + +Coming soon. diff --git a/docs/features/code_act/python-implementation.md b/docs/features/code_act/python-implementation.md new file mode 100644 index 0000000000..d5a4a3b018 --- /dev/null +++ b/docs/features/code_act/python-implementation.md @@ -0,0 +1,359 @@ +# CodeAct Python implementation + +This document describes the Python realization of the CodeAct design in +[`docs/decisions/0024-codeact-integration.md`](../../decisions/0024-codeact-integration.md). + +This document is intentionally focused on the Python design and public API surface. +The initial public Python type described here is `HyperlightCodeActProvider`. Future Python backends, such as Monty, should follow the same conceptual model with their own concrete provider types rather than through a public abstract base class or a public executor parameter. + +## What is the goal of this feature? + +Goals: +- Python developers can enable CodeAct through a `ContextProvider`-based integration. +- Developers can configure a provider-owned CodeAct tool set that is separate from the agent's direct `tools=` surface. +- Developers can use the same `execute_code` concept for both tool-enabled CodeAct and a standard code interpreter tool implementation. +- Developers can swap execution backends over time, starting with Hyperlight while keeping room for alternatives such as Pydantic's Monty. +- Developers can configure execution capabilities such as workspace mounts and outbound network allow lists in a portable way. + +Success Metric: +- Python samples exist for both a tool-enabled CodeAct mode and a standard interpreter mode. + +Implementation-free outcome: +- A Python developer can attach a backend-specific CodeAct provider, choose which tools are available inside CodeAct, and configure execution capabilities without rewriting the function invocation loop. + +## What is the problem being solved? + +- Today, the easiest way to prototype CodeAct is to infer or reshape the agent's direct tool surface, which is fragile and hard to reason about. +- In Python, inferring a CodeAct tool surface from generic agent tool configuration is fragile and hard to reason about. +- There is no first-class Python design that simultaneously covers Hyperlight-backed CodeAct now, future backend-specific providers such as Monty, and both tool-enabled and interpreter modes. +- Sandbox capabilities such as mounted file access and outbound network access need a portable configuration model instead of ad hoc backend-specific wiring. +- Approval behavior needs to be explicit and configurable, especially when CodeAct and direct tool calling may both be available. + +## API Changes + +### CodeAct contract + +#### Terminology + +- **CodeAct** is the primary term. +- **Code mode**, **codemode**, and **programmatic tool calling** refer to the same concept in this document. +- `execute_code` is the model-facing tool name used by the initial Python providers in this spec. + +#### Provider-owned CodeAct tool registry + +A concrete Python CodeAct provider owns the set of tools available through `call_tool(...)` inside CodeAct. + +Rules: +- Only tools explicitly configured on the concrete provider instance are available inside CodeAct. +- The provider must not infer its CodeAct-managed tool set from the agent's direct `tools=` configuration. +- Exclusive versus mixed behavior is achieved by where tools are configured, not by rewriting the agent's direct tool list. + +Implications: +- **CodeAct-only tool**: configured on the concrete CodeAct provider only. +- **Direct-only tool**: configured on the agent only. +- **Tool available both ways**: configured on both the agent and the concrete CodeAct provider. + +#### Managing tools and capabilities after provider construction + +There is no separate runtime setup object in the Python design. CodeAct tools, file mounts, and outbound network allow-list state are managed directly on the provider through CRUD-style registry methods. + +Preferred pattern: +- `add_tools(...) -> None` +- `get_tools() -> Sequence[ToolTypes]` +- `remove_tool(...) -> None` +- `clear_tools() -> None` +- `add_file_mounts(...) -> None` +- `get_file_mounts() -> Sequence[FileMount]` +- `remove_file_mount(...) -> None` +- `clear_file_mounts() -> None` +- `add_allowed_domains(...) -> None` +- `get_allowed_domains() -> Sequence[AllowedDomain]` +- `remove_allowed_domain(...) -> None` +- `clear_allowed_domains() -> None` + +Requirements: +- The provider-owned CodeAct tool registry is keyed by tool name. +- `add_tools(...)` adds new tools and replaces an existing provider-owned registration when the same tool name is added again. +- `get_tools()` returns the provider's current configured CodeAct tool registry. +- `remove_tool(...)` removes provider-owned CodeAct tools by name. +- `clear_tools()` removes all provider-owned CodeAct tools. +- File mounts are keyed by sandbox mount path. +- `add_file_mounts(...)` adds new file mounts and replaces an existing mount when the same mount path is added again. +- `get_file_mounts()` returns the provider's current configured file mounts. +- `remove_file_mount(...)` removes file mounts by mount path. +- `clear_file_mounts()` removes all configured file mounts. +- Allowed domains are keyed by normalized target string. +- `add_allowed_domains(...)` adds allow-list entries and replaces an existing entry when the same target is added again. +- `get_allowed_domains()` returns the current outbound allow-list entries. +- `remove_allowed_domain(...)` removes allow-list entries by target. +- `clear_allowed_domains()` removes all configured allow-list entries. +- Tool, file-mount, and network-allow-list mutations affect subsequent runs only; runs already in progress keep the snapshot captured at run start. +- The provider must snapshot its effective tool registry and capability state at the start of each run so concurrent execution remains deterministic. + +#### Approval model + +The initial Python design follows the ADR's initial approval decision and reuses the existing tool approval vocabulary from `agent_framework._tools`: + +- `approval_mode="always_require"` +- `approval_mode="never_require"` + +The provider exposes a default `approval_mode` for `execute_code`. + +Effective `execute_code` approval is computed as follows: + +- If the provider default is `always_require`, `execute_code` requires approval. +- If the provider default is `never_require`, the provider evaluates the provider-owned CodeAct tool registry snapshot for that run. +- If every provider-owned CodeAct tool in that snapshot is `never_require`, `execute_code` is `never_require`. +- If any provider-owned CodeAct tool in that snapshot is `always_require`, `execute_code` is `always_require`, even if the generated code may not call that tool. +- Provider-owned tool calls made through `call_tool(...)` during that execution run use the approval already determined for `execute_code`. +- Direct-only agent tools are excluded from this calculation. +- File and network capabilities do not create a separate runtime approval check in the initial model; configuring them on the provider, including adding file mounts or outbound network allow-list entries, is itself the approval for those capabilities. + +This is intentionally conservative and matches the shape of the current function-tool approval flow, where `FunctionTool` uses `always_require` / `never_require` and the auto-invocation loop escalates the whole batch if any called tool requires approval. + +If one sensitive provider-owned tool causes `execute_code` to require approval more often than desired, the mitigation is to keep that tool direct-only or expose it through a different CodeAct provider/tool surface. The initial model does not try to infer whether generated code will actually call that tool before approval. + +If the framework later standardizes pre-execution inspection or nested per-tool approvals, the Python provider surface can grow to expose that explicitly. The initial design does not assume that those extra modes are required. + +#### Shared execution flow + +On each run: +1. Resolve the provider's backend/runtime behavior, capabilities, provider default `approval_mode`, and provider-owned tool registry. +2. Compute the effective approval requirement for `execute_code` from the provider default plus the provider-owned tool registry snapshot. +3. Build provider-defined instructions. +4. Add `execute_code` to the model-facing tool surface. +5. Invoke the underlying model. +6. When `execute_code` is called, create or reuse an execution environment keyed by provider type, backend setup identity, capability configuration, and provider-owned tool signature. +7. If the current provider mode exposes host tools, expose `call_tool(...)` bound only to the provider-owned tool registry. +8. Execute code and convert results to framework-native content objects. + +Caching rules: +- Backends that support snapshots may cache a reusable clean snapshot. +- Backends that do not support snapshots may still cache warm initialization artifacts. +- No mutable per-run execution state may be shared across concurrent runs. +- In-memory interpreter state does not persist across separate `execute_code` calls. +- Configured workspace files, mounted files, and any writable artifact/output area are the supported persistence mechanism across calls when the backend exposes them. + +### Python public API + +#### Core types + +```python +class FileMount(NamedTuple): + host_path: str | Path + mount_path: str + +FileMountInput = str | tuple[str | Path, str] | FileMount + + +class AllowedDomain(NamedTuple): + target: str + methods: tuple[str, ...] | None = None + + +AllowedDomainInput = str | tuple[str, str | Sequence[str]] | AllowedDomain + + +class HyperlightCodeActProvider(ContextProvider): + def __init__( + self, + source_id: str = "hyperlight_codeact", + *, + backend: str = "wasm", + module: str | None = "python_guest.path", + module_path: str | None = None, + tools: ToolTypes | None = None, + approval_mode: Literal["always_require", "never_require"] = "never_require", + workspace_root: Path | None = None, + file_mounts: Sequence[FileMountInput] = (), + allowed_domains: Sequence[AllowedDomainInput] = (), + ) -> None: ... + + def add_tools(self, tools: ToolTypes | Sequence[ToolTypes]) -> None: ... + def get_tools(self) -> Sequence[ToolTypes]: ... + def remove_tool(self, name: str) -> None: ... + def clear_tools(self) -> None: ... + def add_file_mounts(self, mounts: FileMountInput | Sequence[FileMountInput]) -> None: ... + def get_file_mounts(self) -> Sequence[FileMount]: ... + def remove_file_mount(self, mount_path: str) -> None: ... + def clear_file_mounts(self) -> None: ... + def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None: ... + def get_allowed_domains(self) -> Sequence[AllowedDomain]: ... + def remove_allowed_domain(self, domain: str) -> None: ... + def clear_allowed_domains(self) -> None: ... +``` + +`file_mounts` accepts three equivalent input forms: +- `"data/report.csv"` uses the same relative path on the host and in the sandbox. +- `("fixtures/users.json", "data/users.json")` or `(Path("fixtures/users.json"), "data/users.json")` uses distinct host and sandbox paths. +- `FileMount(Path("fixtures/users.json"), "data/users.json")` is the named-tuple form of the explicit pair. + +`allowed_domains` accepts three equivalent input forms: +- `"github.com"` allows that target with all backend-supported methods. +- `("github.com", "GET")` or `("github.com", ["GET", "HEAD"])` uses an explicit per-target method list. +- `AllowedDomain("github.com", ("GET", "HEAD"))` is the named-tuple form of the explicit entry. + +No public abstract `CodeActContextProvider` base or public `executor=` parameter is required for the initial Python API. + +The initial alpha package also exports a standalone `HyperlightExecuteCodeTool` +for direct-tool scenarios where a provider is not needed. That standalone tool +should advertise `call_tool(...)`, the registered sandbox tools, and capability +state through its own `description` rather than requiring separate agent +instructions. + +Provider modes: +- If no CodeAct-managed tools are configured, `HyperlightCodeActProvider` uses interpreter-style behavior. +- If one or more CodeAct-managed tools are configured, `HyperlightCodeActProvider` uses tool-enabled behavior. + +#### Python provider implementation contract + +The concrete provider plugs into the existing Python `ContextProvider` surface from `agent_framework._sessions`. + +The Hyperlight package also depends on a small set of core hooks that must remain available from `agent-framework-core`: +- `ContextProvider.before_run(...)` +- `SessionContext.extend_instructions(...)` +- `SessionContext.extend_tools(...)` +- per-run runtime tool access via `SessionContext.options["tools"]` +- the shared `ApprovalMode` vocabulary used by `FunctionTool` + +Required lifecycle hook: +- `before_run(*, agent, session, context, state) -> None` + +Optional lifecycle hook: +- `after_run(*, agent, session, context, state) -> None` + +`before_run(...)` is responsible for: +- snapshotting the current CodeAct-managed tool registry and capability settings for the run, +- computing the effective approval requirement for `execute_code` from the provider default and the snapshotted tool registry, +- adding a short CodeAct guidance block, +- adding `execute_code` to the run through `SessionContext.extend_tools(...)`, +- and wiring any backend-specific execution state needed for the run. + +If the provider stores anything in `state`, that value must stay JSON-serializable. + +Mutating the provider after `before_run(...)` has captured a run-scoped snapshot is allowed, but it affects subsequent runs only. Provider implementations should synchronize state capture and CRUD operations so shared provider instances remain safe across concurrent runs. + +`after_run(...)` is responsible for any backend-specific cleanup or post-processing that must happen after the model invocation completes. + +If shared internal helpers are introduced later for multiple concrete providers, they should standardize responsibilities for: +- building instructions, +- computing effective approval, +- configuring file access, +- configuring network access, +- preparing or restoring execution state, +- executing code, +- and converting backend output into framework-native `Content`. + +#### Runtime behavior + +- `before_run(...)` adds a short CodeAct guidance block through `SessionContext.extend_instructions(...)`. +- `before_run(...)` adds `execute_code` through `SessionContext.extend_tools(...)`. +- The detailed `call_tool(...)`, sandbox-tool, and capability guidance is carried by `execute_code.description`. +- `execute_code` invokes the configured Hyperlight sandbox guest. +- If the current CodeAct tool registry is non-empty, the runtime injects `call_tool(...)` bound to the provider-owned tool registry. +- The provider does not inspect or mutate `Agent.default_options["tools"]` or `context.options["tools"]` to determine its CodeAct tool set. +- The provider snapshots the current CodeAct tool registry and capability state at run start, so later registry and allow-list mutations only affect future runs. +- Interpreter versus tool-enabled behavior is derived from the concrete provider and the presence of CodeAct-managed tools, not from a separate public profile object. +- `execute_code` should be traced like a normal tool invocation within the surrounding agent run, and provider-owned tool calls executed through `call_tool(...)` should continue to emit ordinary tool invocation telemetry. + +#### Backend integration + +Initial public provider: +- `HyperlightCodeActProvider` + +Backend-specific notes: +- **Hyperlight** + - Provider construction needs a guest artifact via `module`, which may be a packaged guest module name or a path to a compiled guest artifact. + - File access maps naturally to Hyperlight Sandbox's read-only `/input` and writable `/output` capability model. + - Network access is denied by default and is enabled through per-target allow-list entries. +- **Monty** + - A future `MontyCodeActProvider` should be a separate public type rather than a `HyperlightCodeActProvider` mode. + - Monty does not expose built-in filesystem or network access directly inside the interpreter. + - File and URL access are mediated through host-provided external functions, so a Monty provider would need to translate provider settings into virtual files and allow-checked callbacks. + - Monty setup may also include backend-specific inputs such as `script_name`, optional type-check stubs, or restored snapshots. + +#### Capability handling + +Capabilities are first-class `HyperlightCodeActProvider` init parameters and provider-managed CRUD surfaces: +- `workspace_root` +- `file_mounts` +- `allowed_domains` + +Concrete providers should normalize these settings internally. Hyperlight can map them directly to sandbox capabilities, while Monty must enforce them through host-mediated file and network functions and may apply stricter URL-level checks than the public provider surface expresses. + +Expected management split: +- `workspace_root` remains a direct configuration value on the provider, +- file mounts are managed through provider CRUD methods, +- outbound allow-list entries are managed through provider CRUD methods. + +Enabling access means: +- Configuring `workspace_root` or any `file_mounts` enables the sandbox filesystem surface exposed through `/input` and `/output`. +- Leaving both `workspace_root` and `file_mounts` unset means no filesystem surface is configured. +- Adding any `allowed_domains` entry enables outbound access only for the configured targets; leaving it empty means network access is disabled without a separate `network_mode` flag. +- A string target allows all backend-supported methods for that target; an explicit tuple or `AllowedDomain` entry narrows the methods for that target. + +Backends may implement stricter semantics than these top-level settings. For example, Hyperlight naturally maps file access to `/input` and `/output`, while Monty would enforce equivalent policy through host-provided callbacks rather than direct interpreter I/O. + +#### Execution output representation + +Backend execution output should be translated into existing AF `Content` values rather than a custom `CodeActExecutionResult` type. + +Use the existing content model from `agent_framework._types`, for example: +- `Content.from_code_interpreter_tool_result(outputs=[...])` to surface the overall result of sandboxed code execution, +- `Content.from_text(...)` for plain textual output, +- `Content.from_data(...)` or `Content.from_uri(...)` for generated files or binary artifacts, +- `Content.from_error(...)` for execution failures, +- and `Content.from_function_result(..., result=list[Content])` when surfacing the final result of `execute_code` through the normal tool result path. + +#### `execute_code` input contract + +```json +{ + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Code to execute using the provider's configured backend/runtime behavior." + } + }, + "required": ["code"] +} +``` + +Execution failures should surface readable error text and structured error `Content`, not a custom backend result object. + +Timeouts, out-of-memory conditions, backend crashes, and similar sandbox failures are all `execute_code` failures and should surface as structured error content. Partial textual or file outputs may be returned only when the backend can report them unambiguously; callers should not rely on partial-output recovery as a portable contract. + +## E2E Code Samples + +### Tool-enabled CodeAct mode + +```python +codeact = HyperlightCodeActProvider( + tools=[fetch_docs, query_data], + workspace_root="./workdir", + allowed_domains=[("api.github.com", "GET")], +) +codeact.add_tools([lookup_user]) + +agent = Agent( + client=client, + name="assistant", + tools=[send_email], # direct-only tool + context_providers=[codeact], +) +``` + +### Standard code interpreter mode + +```python +code_interpreter = HyperlightCodeActProvider( + workspace_root="./data", +) + +agent = Agent( + client=client, + name="interpreter", + context_providers=[code_interpreter], +) +``` diff --git a/python/.cspell.json b/python/.cspell.json index a26cc7fed7..b72fa96cf5 100644 --- a/python/.cspell.json +++ b/python/.cspell.json @@ -30,6 +30,7 @@ "azuredocs", "azurefunctions", "boto", + "codeact", "contentvector", "contoso", "datamodel", @@ -45,6 +46,7 @@ "hnsw", "httpx", "huggingface", + "hyperlight", "Instrumentor", "logit", "logprobs", diff --git a/python/PACKAGE_STATUS.md b/python/PACKAGE_STATUS.md index 7a726812ff..7681ae1d0d 100644 --- a/python/PACKAGE_STATUS.md +++ b/python/PACKAGE_STATUS.md @@ -32,6 +32,7 @@ Status is grouped into these buckets: | `agent-framework-foundry` | `python/packages/foundry` | `released` | | `agent-framework-foundry-local` | `python/packages/foundry_local` | `beta` | | `agent-framework-github-copilot` | `python/packages/github_copilot` | `beta` | +| `agent-framework-hyperlight` | `python/packages/hyperlight` | `alpha` | | `agent-framework-lab` | `python/packages/lab` | `beta` | | `agent-framework-mem0` | `python/packages/mem0` | `beta` | | `agent-framework-ollama` | `python/packages/ollama` | `beta` | diff --git a/python/packages/core/agent_framework/_agents.py b/python/packages/core/agent_framework/_agents.py index 585898ae52..464d9bc21f 100644 --- a/python/packages/core/agent_framework/_agents.py +++ b/python/packages/core/agent_framework/_agents.py @@ -1191,11 +1191,14 @@ async def _prepare_run_context( options=opts, service_stores_history=bool(store_), ) + provider_options = dict(opts) + if tools_ is not None: + provider_options["tools"] = tools_ session_context, chat_options = await self._prepare_session_and_messages( session=active_session, input_messages=input_messages, - options=opts, + options=provider_options, ) default_additional_args = chat_options.pop("additional_function_arguments", None) if isinstance(default_additional_args, Mapping): @@ -1209,7 +1212,7 @@ async def _prepare_run_context( mcp_duplicate_message = "Tool names must be unique. Consider setting `tool_name_prefix` on the MCPTool." # Normalize tools - normalized_tools = normalize_tools(tools_) + normalized_tools = normalize_tools(session_context.options.get("tools", tools_)) # Resolve final tool list (configured tools + runtime provided tools + local MCP server tools) final_tools = list(base_tools) diff --git a/python/packages/core/agent_framework/_sessions.py b/python/packages/core/agent_framework/_sessions.py index 55d1a10a18..7754475e63 100644 --- a/python/packages/core/agent_framework/_sessions.py +++ b/python/packages/core/agent_framework/_sessions.py @@ -149,7 +149,8 @@ class SessionContext: middleware: Dict mapping source_id -> chat/function middleware added by that provider. Maintains insertion order (provider execution order). response: After invocation, contains the full AgentResponse, should not be changed. - options: Options passed to agent.run() - read-only, for reflection only. + options: Options passed to agent.run(). Providers can inspect these and may + update ``options["tools"]`` to influence per-run tool resolution. metadata: Shared metadata dictionary for cross-provider communication. """ @@ -176,7 +177,8 @@ def __init__( instructions: Pre-populated instructions. tools: Pre-populated tools. middleware: Pre-populated chat/function middleware by source. - options: Options from agent.run() - read-only for providers. + options: Options from agent.run(). Providers may inspect these and can + update ``options["tools"]`` to influence per-run tool resolution. metadata: Shared metadata for cross-provider communication. """ self.session_id = session_id diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 6cdc74b313..3d119413d0 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -89,6 +89,7 @@ DEFAULT_MAX_ITERATIONS: Final[int] = 40 DEFAULT_MAX_CONSECUTIVE_ERRORS_PER_REQUEST: Final[int] = 3 SHELL_TOOL_KIND_VALUE: Final[str] = "shell" +ApprovalMode: TypeAlias = Literal["always_require", "never_require"] ChatClientT = TypeVar("ChatClientT", bound="SupportsChatGetResponse[Any]") ResponseModelBoundT = TypeVar("ResponseModelBoundT", bound=BaseModel) @@ -270,7 +271,7 @@ def __init__( *, name: str, description: str = "", - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, @@ -1030,7 +1031,7 @@ def tool( name: str | None = None, description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, @@ -1046,7 +1047,7 @@ def tool( name: str | None = None, description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, @@ -1061,7 +1062,7 @@ def tool( name: str | None = None, description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, - approval_mode: Literal["always_require", "never_require"] | None = None, + approval_mode: ApprovalMode | None = None, kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, diff --git a/python/packages/core/tests/core/test_agents.py b/python/packages/core/tests/core/test_agents.py index c7b3d7860c..41cc6e4663 100644 --- a/python/packages/core/tests/core/test_agents.py +++ b/python/packages/core/tests/core/test_agents.py @@ -841,6 +841,67 @@ async def test_per_service_call_persistence_rejects_existing_conversation_id_whe await agent.run("Hello", session=session, options={"store": False, "conversation_id": "existing_conversation"}) +async def test_context_provider_can_inspect_runtime_tools_from_run( + chat_client_base: SupportsChatGetResponse, +) -> None: + seen_tools: list[Any] = [] + + class RuntimeToolsProvider(ContextProvider): + def __init__(self) -> None: + super().__init__(source_id="runtime-tools") + + async def before_run(self, *, agent: Any, session: Any, context: Any, state: Any) -> None: + del agent, session, state + tools = context.options.get("tools", []) + seen_tools.extend(list(tools) if isinstance(tools, list) else [tools]) + + runtime_tool = FunctionTool(func=lambda: "runtime", name="runtime_tool", description="Runtime tool") + agent = Agent(client=chat_client_base, context_providers=[RuntimeToolsProvider()]) + + await agent._prepare_run_context( # type: ignore[reportPrivateUsage] + messages="Hello", + session=agent.create_session(), + tools=[runtime_tool], + options=None, + compaction_strategy=None, + tokenizer=None, + function_invocation_kwargs=None, + client_kwargs=None, + ) + + assert seen_tools == [runtime_tool] + + +async def test_context_provider_can_remove_runtime_tools_from_run( + chat_client_base: SupportsChatGetResponse, +) -> None: + class RuntimeToolsProvider(ContextProvider): + def __init__(self) -> None: + super().__init__(source_id="runtime-tools") + + async def before_run(self, *, agent: Any, session: Any, context: Any, state: Any) -> None: + del agent, session, state + context.options["tools"] = [] + + base_tool = FunctionTool(func=lambda: "base", name="base_tool", description="Base tool") + runtime_tool = FunctionTool(func=lambda: "runtime", name="runtime_tool", description="Runtime tool") + agent = Agent(client=chat_client_base, tools=[base_tool], context_providers=[RuntimeToolsProvider()]) + + ctx = await agent._prepare_run_context( # type: ignore[reportPrivateUsage] + messages="Hello", + session=agent.create_session(), + tools=[runtime_tool], + options=None, + compaction_strategy=None, + tokenizer=None, + function_invocation_kwargs=None, + client_kwargs=None, + ) + + tool_names = [_get_tool_name(tool_obj) for tool_obj in ctx["chat_options"]["tools"]] + assert tool_names == ["base_tool"] + + async def test_chat_client_agent_run_with_session(chat_client_base: SupportsChatGetResponse) -> None: mock_response = ChatResponse( messages=[Message(role="assistant", contents=[Content.from_text("test response")])], diff --git a/python/packages/hyperlight/LICENSE b/python/packages/hyperlight/LICENSE new file mode 100644 index 0000000000..9e841e7a26 --- /dev/null +++ b/python/packages/hyperlight/LICENSE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/python/packages/hyperlight/README.md b/python/packages/hyperlight/README.md new file mode 100644 index 0000000000..cfe2a02022 --- /dev/null +++ b/python/packages/hyperlight/README.md @@ -0,0 +1,36 @@ +# agent-framework-hyperlight + +Alpha Hyperlight-backed CodeAct integrations for Microsoft Agent Framework. + +## Installation + +```bash +pip install agent-framework-hyperlight --pre +``` + +This package depends on `hyperlight-sandbox`, the packaged Python guest, and the +Wasm backend package on supported platforms. If the backend is not published for +your current platform yet, `execute_code` will fail at runtime when it tries to +create the sandbox. + +## Public API + +- `AllowedDomain` +- `AllowedDomainInput` +- `HyperlightCodeActProvider` +- `HyperlightExecuteCodeTool` +- `FileMount` +- `FileMountInput` + +## Notes + +- This package is intentionally separate from `agent-framework-core` so CodeAct + usage and installation remain optional. +- Alpha-package samples live under `packages/hyperlight/samples/`. +- `file_mounts` accepts a single string shorthand, an explicit `(host_path, + mount_path)` pair, or a `FileMount` named tuple. The host-side path in the + explicit forms may be a `str` or `Path`. Use the explicit two-value form when + the host path differs from the sandbox path. +- `allowed_domains` accepts a single string target such as `"github.com"` to + allow all backend-supported methods, an explicit `(target, method_or_methods)` + tuple such as `("github.com", "GET")`, or an `AllowedDomain` named tuple. diff --git a/python/packages/hyperlight/agent_framework_hyperlight/__init__.py b/python/packages/hyperlight/agent_framework_hyperlight/__init__.py new file mode 100644 index 0000000000..511252d0df --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import importlib.metadata + +from ._execute_code_tool import HyperlightExecuteCodeTool +from ._provider import HyperlightCodeActProvider +from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountInput + +try: + __version__ = importlib.metadata.version(__name__) +except importlib.metadata.PackageNotFoundError: + __version__ = "0.0.0" + +__all__ = [ + "AllowedDomain", + "AllowedDomainInput", + "FileMount", + "FileMountInput", + "HyperlightCodeActProvider", + "HyperlightExecuteCodeTool", + "__version__", +] diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_execute_code_tool.py b/python/packages/hyperlight/agent_framework_hyperlight/_execute_code_tool.py new file mode 100644 index 0000000000..875faa3413 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_execute_code_tool.py @@ -0,0 +1,721 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import ast +import copy +import mimetypes +import shutil +import threading +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from pathlib import Path, PurePosixPath +from tempfile import TemporaryDirectory +from typing import Annotated, Any, Protocol, TypeGuard, cast +from urllib.parse import urlparse + +from agent_framework import Content, FunctionTool +from agent_framework._tools import ApprovalMode, normalize_tools +from pydantic import BaseModel, Field + +from ._instructions import build_codeact_instructions, build_execute_code_description +from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountHostPath, FileMountInput + +DEFAULT_HYPERLIGHT_BACKEND = "wasm" +DEFAULT_HYPERLIGHT_MODULE = "python_guest.path" +EXECUTE_CODE_INPUT_DESCRIPTION = "Python code to execute in an isolated Hyperlight sandbox." + + +class _ExecuteCodeInput(BaseModel): + code: Annotated[str, Field(description=EXECUTE_CODE_INPUT_DESCRIPTION)] + + +@dataclass(frozen=True, slots=True) +class _StoredFileMount: + host_path: Path + mount_path: str + + +@dataclass(frozen=True, slots=True) +class _NormalizedFileMount: + host_path: Path + mount_path: str + path_signature: tuple[tuple[str, int, int], ...] + + +@dataclass(frozen=True, slots=True) +class _RunConfig: + backend: str + module: str | None + module_path: str | None + approval_mode: ApprovalMode + tools: tuple[FunctionTool, ...] + workspace_root: Path | None + workspace_signature: tuple[tuple[str, int, int], ...] + file_mounts: tuple[_NormalizedFileMount, ...] + allowed_domains: tuple[AllowedDomain, ...] + + @property + def mounted_paths(self) -> tuple[str, ...]: + return tuple(_display_mount_path(mount.mount_path) for mount in self.file_mounts) + + @property + def filesystem_enabled(self) -> bool: + return self.workspace_root is not None or bool(self.file_mounts) + + def cache_key(self) -> tuple[Any, ...]: + return ( + self.backend, + self.module, + self.module_path, + self.approval_mode, + tuple((tool_obj.name, id(tool_obj)) for tool_obj in self.tools), + str(self.workspace_root) if self.workspace_root is not None else None, + self.workspace_signature, + tuple((mount.mount_path, str(mount.host_path), mount.path_signature) for mount in self.file_mounts), + tuple((allowed_domain.target, allowed_domain.methods) for allowed_domain in self.allowed_domains), + ) + + +class SandboxRuntime(Protocol): + def execute(self, *, config: _RunConfig, code: str) -> list[Content]: ... + + +@dataclass +class _SandboxEntry: + sandbox: Any + snapshot: Any + input_dir: TemporaryDirectory[str] | None + output_dir: TemporaryDirectory[str] | None + lock: threading.RLock + + +def _load_sandbox_class() -> type[Any]: + try: + from hyperlight_sandbox import Sandbox + except ModuleNotFoundError as exc: + raise ModuleNotFoundError( + "Hyperlight support requires `hyperlight-sandbox`, `hyperlight-sandbox-python-guest`, " + "and a compatible backend package such as `hyperlight-sandbox-backend-wasm`." + ) from exc + + return Sandbox + + +def _passthrough_result_parser(result: Any) -> str: + return repr(result) + + +def _collect_tools(*tool_groups: Any) -> list[FunctionTool]: + tools_by_name: dict[str, FunctionTool] = {} + + for tool_group in tool_groups: + normalized_group = normalize_tools(tool_group) + for tool_obj in normalized_group: + if not isinstance(tool_obj, FunctionTool): + continue + if tool_obj.name == "execute_code": + continue + tools_by_name.pop(tool_obj.name, None) + tools_by_name[tool_obj.name] = tool_obj + + return list(tools_by_name.values()) + + +def _resolve_execute_code_approval_mode( + *, + base_approval_mode: ApprovalMode, + tools: Sequence[FunctionTool], +) -> ApprovalMode: + if base_approval_mode == "always_require": + return "always_require" + + if any(tool_obj.approval_mode == "always_require" for tool_obj in tools): + return "always_require" + + return "never_require" + + +def _resolve_existing_path(value: str | Path) -> Path: + return Path(value).expanduser().resolve(strict=True) + + +def _resolve_workspace_root(value: str | Path | None) -> Path | None: + if value is None: + return None + + resolved_path = _resolve_existing_path(value) + if not resolved_path.is_dir(): + raise ValueError("workspace_root must point to an existing directory.") + return resolved_path + + +def _is_file_mount_pair(value: Any) -> TypeGuard[FileMount | tuple[FileMountHostPath, str]]: + if not isinstance(value, tuple): + return False + + value_tuple = cast(tuple[object, ...], value) + if len(value_tuple) != 2: + return False + + host_path, mount_path = value_tuple + return isinstance(host_path, (str, Path)) and isinstance(mount_path, str) + + +def _normalize_file_mount_input(file_mount: FileMountInput) -> _StoredFileMount: + host_path: FileMountHostPath + mount_path: str + if isinstance(file_mount, str): + host_path = file_mount + mount_path = file_mount + else: + host_path = file_mount[0] + mount_path = file_mount[1] + + return _StoredFileMount( + host_path=_resolve_existing_path(host_path), + mount_path=_normalize_mount_path(mount_path), + ) + + +def _normalize_domain(target: str) -> str: + candidate = target.strip() + if not candidate: + raise ValueError("Allowed domain entries must not be empty.") + + parsed = urlparse(candidate if "://" in candidate else f"//{candidate}") + normalized = (parsed.netloc or parsed.path).strip().rstrip("/") + if not normalized: + raise ValueError(f"Could not normalize allowed domain entry: {target!r}.") + return normalized.lower() + + +def _normalize_http_method(method: str) -> str: + normalized = method.strip().upper() + if not normalized: + raise ValueError("HTTP method entries must not be empty.") + return normalized + + +def _normalize_http_methods(methods: str | Sequence[str] | None) -> tuple[str, ...] | None: + if methods is None: + return None + + normalized_methods = ( + {_normalize_http_method(methods)} + if isinstance(methods, str) + else {_normalize_http_method(method) for method in methods} + ) + if not normalized_methods: + raise ValueError("Allowed domain methods must not be empty when provided.") + return tuple(sorted(normalized_methods)) + + +def _is_allowed_domain_pair(value: Any) -> TypeGuard[tuple[str, str | Sequence[str]]]: + if not isinstance(value, tuple) or isinstance(value, AllowedDomain): + return False + + value_tuple = cast(tuple[object, ...], value) + if len(value_tuple) != 2: + return False + + target, methods = value_tuple + if not isinstance(target, str): + return False + if isinstance(methods, str): + return True + return isinstance(methods, Sequence) + + +def _normalize_allowed_domain_input(allowed_domain: AllowedDomainInput) -> AllowedDomain: + if isinstance(allowed_domain, str): + return AllowedDomain(target=_normalize_domain(allowed_domain), methods=None) + + if isinstance(allowed_domain, AllowedDomain): + return AllowedDomain( + target=_normalize_domain(allowed_domain.target), + methods=_normalize_http_methods(allowed_domain.methods), + ) + + target, methods = allowed_domain + return AllowedDomain( + target=_normalize_domain(target), + methods=_normalize_http_methods(methods), + ) + + +def _normalize_mount_path(mount_path: str) -> str: + raw_path = mount_path.strip().replace("\\", "/") + if not raw_path: + raise ValueError("mount_path must not be empty.") + + pure_path = PurePosixPath(raw_path) + parts = [part for part in pure_path.parts if part not in {"", "/", "."}] + if parts and parts[0] == "input": + parts = parts[1:] + if any(part == ".." for part in parts): + raise ValueError("mount_path must stay within /input.") + if not parts: + raise ValueError("mount_path must point to a concrete path under /input.") + return "/".join(parts) + + +def _display_mount_path(mount_path: str) -> str: + return f"/input/{mount_path}" + + +def _path_tree_signature(path: Path) -> tuple[tuple[str, int, int], ...]: + if path.is_file(): + stat = path.stat() + return ((path.name, int(stat.st_size), int(stat.st_mtime_ns)),) + + entries: list[tuple[str, int, int]] = [] + for candidate in sorted(path.rglob("*"), key=lambda value: value.as_posix()): + try: + stat = candidate.stat() + except FileNotFoundError: + continue + relative_path = candidate.relative_to(path).as_posix() + size = int(stat.st_size) if candidate.is_file() else 0 + entries.append((relative_path, size, int(stat.st_mtime_ns))) + return tuple(entries) + + +def _copy_path(source: Path, destination: Path) -> None: + if source.is_dir(): + destination.mkdir(parents=True, exist_ok=True) + for child in sorted(source.iterdir(), key=lambda value: value.name): + _copy_path(child, destination / child.name) + return + + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, destination) + + +def _populate_input_dir(*, config: _RunConfig, input_root: Path) -> None: + if config.workspace_root is not None: + for child in sorted(config.workspace_root.iterdir(), key=lambda value: value.name): + _copy_path(child, input_root / child.name) + + for mount in config.file_mounts: + _copy_path(mount.host_path, input_root / mount.mount_path) + + +def _create_file_content(file_path: Path, *, relative_path: str) -> Content: + media_type = mimetypes.guess_type(file_path.name)[0] or "application/octet-stream" + return Content.from_data( + data=file_path.read_bytes(), + media_type=media_type, + additional_properties={"path": f"/output/{relative_path}"}, + ) + + +def _parse_output_files(*, sandbox: Any, output_dir: TemporaryDirectory[str] | None) -> list[Content]: + if output_dir is None or not hasattr(sandbox, "get_output_files"): + return [] + + try: + output_files = sandbox.get_output_files() + except Exception: + return [] + + contents: list[Content] = [] + root = Path(output_dir.name) + + for output_file in output_files: + raw_path = str(output_file).replace("\\", "/") + pure_path = PurePosixPath(raw_path) + parts = [part for part in pure_path.parts if part not in {"", "/", "."}] + if parts and parts[0] == "output": + parts = parts[1:] + if not parts or any(part == ".." for part in parts): + continue + + relative_path = "/".join(parts) + host_path = root.joinpath(*parts) + if host_path.is_file(): + contents.append(_create_file_content(host_path, relative_path=relative_path)) + + return contents + + +def _build_execution_contents( + *, + result: Any, + sandbox: Any, + output_dir: TemporaryDirectory[str] | None, +) -> list[Content]: + success = bool(getattr(result, "success", False)) + stdout = str(getattr(result, "stdout", "") or "").replace("\r\n", "\n") or None + stderr = str(getattr(result, "stderr", "") or "").replace("\r\n", "\n") or None + outputs: list[Content] = [] + + if stdout is not None: + outputs.append(Content.from_text(stdout, raw_representation=result)) + + outputs.extend(_parse_output_files(sandbox=sandbox, output_dir=output_dir)) + + if success: + if stderr is not None: + outputs.append(Content.from_text(stderr, raw_representation=result)) + if not outputs: + outputs.append(Content.from_text("Code executed successfully without output.")) + return [Content.from_code_interpreter_tool_result(outputs=outputs, raw_representation=result)] + + error_details = stderr or "Unknown sandbox error" + outputs.append( + Content.from_error( + message="Execution error", + error_details=error_details, + raw_representation=result, + ) + ) + return [Content.from_code_interpreter_tool_result(outputs=outputs, raw_representation=result)] + + +def _make_sandbox_callback(tool_obj: FunctionTool) -> Callable[..., Any]: + sandbox_tool = copy.copy(tool_obj) + sandbox_tool.result_parser = _passthrough_result_parser + + async def _callback(**kwargs: Any) -> Any: + contents = await sandbox_tool.invoke(arguments=kwargs) + + values: list[Any] = [] + for content in contents: + if content.type == "text" and content.text is not None: + try: + values.append(ast.literal_eval(content.text)) + except (SyntaxError, ValueError): + values.append(content.text) + continue + + values.append(content.to_dict()) + + if len(values) == 1: + return values[0] + return values + + return _callback + + +class _SandboxRegistry: + def __init__(self) -> None: + self._entries: dict[tuple[Any, ...], _SandboxEntry] = {} + self._entries_lock = threading.RLock() + + def execute(self, *, config: _RunConfig, code: str) -> list[Content]: + cache_key = config.cache_key() + with self._entries_lock: + entry = self._entries.get(cache_key) + if entry is None: + entry = self._create_entry(config) + self._entries[cache_key] = entry + + with entry.lock: + entry.sandbox.restore(entry.snapshot) + result = entry.sandbox.run(code=code) + return _build_execution_contents(result=result, sandbox=entry.sandbox, output_dir=entry.output_dir) + + def _create_entry(self, config: _RunConfig) -> _SandboxEntry: + input_dir_handle = TemporaryDirectory() if config.filesystem_enabled else None + output_dir_handle = TemporaryDirectory() if config.filesystem_enabled else None + + if input_dir_handle is not None: + _populate_input_dir(config=config, input_root=Path(input_dir_handle.name)) + + sandbox_cls = _load_sandbox_class() + try: + sandbox = sandbox_cls( + backend=config.backend, + module=config.module, + module_path=config.module_path, + input_dir=input_dir_handle.name if input_dir_handle is not None else None, + output_dir=output_dir_handle.name if output_dir_handle is not None else None, + ) + except ImportError as exc: + raise RuntimeError( + "The selected Hyperlight backend is not installed or not supported on this platform. " + "Install a compatible backend package, such as `hyperlight-sandbox-backend-wasm`." + ) from exc + + for tool_obj in config.tools: + sandbox.register_tool(tool_obj.name, _make_sandbox_callback(tool_obj)) + + for allowed_domain in config.allowed_domains: + sandbox.allow_domain( + allowed_domain.target, + methods=list(allowed_domain.methods) if allowed_domain.methods is not None else None, + ) + + sandbox.run("None") + snapshot = sandbox.snapshot() + return _SandboxEntry( + sandbox=sandbox, + snapshot=snapshot, + input_dir=input_dir_handle, + output_dir=output_dir_handle, + lock=threading.RLock(), + ) + + +class HyperlightExecuteCodeTool(FunctionTool): + """Execute Python code inside a Hyperlight sandbox.""" + + def __init__( + self, + *, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None, + approval_mode: ApprovalMode | None = None, + workspace_root: str | Path | None = None, + file_mounts: FileMountInput | Sequence[FileMountInput] | None = None, + allowed_domains: AllowedDomainInput | Sequence[AllowedDomainInput] | None = None, + backend: str = DEFAULT_HYPERLIGHT_BACKEND, + module: str | None = DEFAULT_HYPERLIGHT_MODULE, + module_path: str | None = None, + _registry: SandboxRuntime | None = None, + ) -> None: + super().__init__( + name="execute_code", + description=EXECUTE_CODE_INPUT_DESCRIPTION, + approval_mode="never_require", + func=self._run_code, + input_model=_ExecuteCodeInput, + ) + self._state_lock = threading.RLock() + self._registry = _registry or _SandboxRegistry() + self._default_approval_mode: ApprovalMode = approval_mode or "never_require" + self._workspace_root = _resolve_workspace_root(workspace_root) + self._backend: str = backend + self._module: str | None = module + self._module_path: str | None = module_path + self._managed_tools: list[FunctionTool] = [] + self._file_mounts: dict[str, _StoredFileMount] = {} + self._allowed_domains: dict[str, AllowedDomain] = {} + + if tools is not None: + self.add_tools(tools) + if file_mounts is not None: + self.add_file_mounts(file_mounts) + if allowed_domains is not None: + self.add_allowed_domains(allowed_domains) + + self._refresh_approval_mode() + + @property + def description(self) -> str: + state_lock = getattr(self, "_state_lock", None) + if state_lock is None: + return str(self.__dict__.get("description", EXECUTE_CODE_INPUT_DESCRIPTION)) + + with state_lock: + allowed_domains = sorted(self._allowed_domains.values(), key=lambda value: value.target) + return build_execute_code_description( + tools=self._managed_tools, + filesystem_enabled=self._workspace_root is not None or bool(self._file_mounts), + workspace_enabled=self._workspace_root is not None, + mounted_paths=[_display_mount_path(mount.mount_path) for mount in self._file_mounts.values()], + allowed_domains=allowed_domains, + ) + + @description.setter + def description(self, value: str) -> None: + self.__dict__["description"] = value + + def add_tools( + self, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]], + ) -> None: + """Add sandbox-managed tools to this execute_code surface.""" + with self._state_lock: + combined_tools = _collect_tools(self._managed_tools, tools) + self._managed_tools = combined_tools + self._refresh_approval_mode() + + def get_tools(self) -> list[FunctionTool]: + """Return the currently managed sandbox tools.""" + with self._state_lock: + return list(self._managed_tools) + + def remove_tool(self, name: str) -> None: + """Remove one managed sandbox tool by name.""" + with self._state_lock: + remaining_tools = [tool_obj for tool_obj in self._managed_tools if tool_obj.name != name] + if len(remaining_tools) == len(self._managed_tools): + raise KeyError(f"No managed tool named {name!r} is registered.") + self._managed_tools = remaining_tools + self._refresh_approval_mode() + + def clear_tools(self) -> None: + """Remove all managed sandbox tools.""" + with self._state_lock: + self._managed_tools = [] + self._refresh_approval_mode() + + def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None: + """Add one or more file mounts under `/input`. + + A single string uses the same relative path on the host and in the sandbox. + Use a two-string tuple or `FileMount` when those paths differ. + """ + if isinstance(file_mounts, str) or _is_file_mount_pair(file_mounts): + normalized_mounts = [_normalize_file_mount_input(file_mounts)] + else: + normalized_mounts = [ + _normalize_file_mount_input(mount) for mount in cast(Sequence[FileMountInput], file_mounts) + ] + + with self._state_lock: + for mount in normalized_mounts: + self._file_mounts[mount.mount_path] = mount + + def get_file_mounts(self) -> list[FileMount]: + """Return the configured file mounts.""" + with self._state_lock: + return [ + FileMount(host_path=mount.host_path, mount_path=_display_mount_path(mount.mount_path)) + for mount in self._file_mounts.values() + ] + + def remove_file_mount(self, mount_path: str) -> None: + """Remove one file mount by its sandbox path.""" + normalized_mount_path = _normalize_mount_path(mount_path) + with self._state_lock: + if normalized_mount_path not in self._file_mounts: + raise KeyError(f"No file mount exists for {mount_path!r}.") + del self._file_mounts[normalized_mount_path] + + def clear_file_mounts(self) -> None: + """Remove all configured file mounts.""" + with self._state_lock: + self._file_mounts.clear() + + def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None: + """Add one or more outbound allow-list entries.""" + if isinstance(domains, (str, AllowedDomain)) or _is_allowed_domain_pair(domains): + normalized_domains = [_normalize_allowed_domain_input(domains)] + else: + normalized_domains = [ + _normalize_allowed_domain_input(domain) for domain in cast(Sequence[AllowedDomainInput], domains) + ] + + with self._state_lock: + for normalized_domain in normalized_domains: + self._allowed_domains[normalized_domain.target] = normalized_domain + + def get_allowed_domains(self) -> list[AllowedDomain]: + """Return the configured outbound allow-list entries.""" + with self._state_lock: + return sorted(self._allowed_domains.values(), key=lambda value: value.target) + + def remove_allowed_domain(self, domain: str) -> None: + """Remove one outbound allow-list entry.""" + normalized_domain = _normalize_domain(domain) + with self._state_lock: + if normalized_domain not in self._allowed_domains: + raise KeyError(f"No allowed domain exists for {domain!r}.") + del self._allowed_domains[normalized_domain] + + def clear_allowed_domains(self) -> None: + """Remove all outbound allow-list entries.""" + with self._state_lock: + self._allowed_domains.clear() + + def build_instructions(self, *, tools_visible_to_model: bool) -> str: + """Build the current CodeAct instructions for this execute_code surface.""" + config = self._build_run_config() + return build_codeact_instructions( + tools=config.tools, + tools_visible_to_model=tools_visible_to_model, + ) + + def create_run_tool(self) -> HyperlightExecuteCodeTool: + """Create a run-scoped snapshot of this execute_code surface.""" + file_mounts = self.get_file_mounts() + allowed_domains = self.get_allowed_domains() + + return HyperlightExecuteCodeTool( + tools=self.get_tools(), + approval_mode=self._default_approval_mode, + workspace_root=self._workspace_root, + file_mounts=file_mounts or None, + allowed_domains=allowed_domains or None, + backend=self._backend, + module=self._module, + module_path=self._module_path, + _registry=self._registry, + ) + + def build_serializable_state(self) -> dict[str, Any]: + """Return a JSON-serializable snapshot of the effective run state.""" + config = self._build_run_config() + return { + "backend": config.backend, + "module": config.module, + "module_path": config.module_path, + "approval_mode": config.approval_mode, + "tool_names": [tool_obj.name for tool_obj in config.tools], + "filesystem_enabled": config.filesystem_enabled, + "workspace_root": str(config.workspace_root) if config.workspace_root is not None else None, + "file_mounts": [ + { + "host_path": str(mount.host_path), + "mount_path": _display_mount_path(mount.mount_path), + } + for mount in config.file_mounts + ], + "network_enabled": bool(config.allowed_domains), + "allowed_domains": [ + { + "target": allowed_domain.target, + "methods": list(allowed_domain.methods) if allowed_domain.methods is not None else None, + } + for allowed_domain in config.allowed_domains + ], + } + + def to_dict(self, *, exclude: set[str] | None = None, exclude_none: bool = True) -> dict[str, Any]: + self.__dict__["description"] = self.description + return super().to_dict(exclude=exclude, exclude_none=exclude_none) + + def _refresh_approval_mode(self) -> None: + self.approval_mode = _resolve_execute_code_approval_mode( + base_approval_mode=self._default_approval_mode, + tools=self._managed_tools, + ) + + def _build_run_config(self) -> _RunConfig: + with self._state_lock: + managed_tools = tuple(self._managed_tools) + workspace_root = self._workspace_root + stored_mounts = tuple(self._file_mounts.values()) + allowed_domains = tuple(sorted(self._allowed_domains.values(), key=lambda value: value.target)) + approval_mode = _resolve_execute_code_approval_mode( + base_approval_mode=self._default_approval_mode, + tools=managed_tools, + ) + + workspace_signature = _path_tree_signature(workspace_root) if workspace_root is not None else () + normalized_mounts = tuple( + _NormalizedFileMount( + host_path=mount.host_path, + mount_path=mount.mount_path, + path_signature=_path_tree_signature(mount.host_path), + ) + for mount in stored_mounts + ) + + return _RunConfig( + backend=self._backend, + module=self._module, + module_path=self._module_path, + approval_mode=approval_mode, + tools=managed_tools, + workspace_root=workspace_root, + workspace_signature=workspace_signature, + file_mounts=normalized_mounts, + allowed_domains=allowed_domains, + ) + + def _run_code(self, *, code: str) -> list[Content]: + config = self._build_run_config() + return self._registry.execute(config=config, code=code) diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_instructions.py b/python/packages/hyperlight/agent_framework_hyperlight/_instructions.py new file mode 100644 index 0000000000..f866c1349c --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_instructions.py @@ -0,0 +1,126 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +from collections.abc import Sequence + +from agent_framework import FunctionTool + +from ._types import AllowedDomain + + +def _format_tool_summaries(tools: Sequence[FunctionTool]) -> str: + if not tools: + return "- No tools are currently registered inside the sandbox." + + lines: list[str] = [] + for tool_obj in tools: + parameters = tool_obj.parameters().get("properties", {}) + parameter_names = [name for name in parameters if isinstance(name, str)] + parameter_summary = ", ".join(parameter_names) if parameter_names else "none" + description = str(tool_obj.description or "").strip() or "No description provided." + lines.append(f"- `{tool_obj.name}`: {description} Parameters: {parameter_summary}.") + return "\n".join(lines) + + +def _format_filesystem_capabilities( + *, + filesystem_enabled: bool, + workspace_enabled: bool, + mounted_paths: Sequence[str], +) -> str: + if not filesystem_enabled: + return "Filesystem access is unavailable because no workspace root or file mounts are configured." + + lines = ["Filesystem access is enabled."] + lines.append("Read files from `/input`.") + lines.append("Write generated artifacts to `/output`; returned files will be attached to the tool result.") + + if workspace_enabled: + lines.append("The configured workspace root is available under `/input/`.") + + if mounted_paths: + lines.append("Additional mounted paths:") + lines.extend(f"- `{mounted_path}`" for mounted_path in mounted_paths) + elif not workspace_enabled: + lines.append("No workspace root or explicit file mounts are currently configured.") + + return "\n".join(lines) + + +def _format_network_capabilities( + *, + allowed_domains: Sequence[AllowedDomain], +) -> str: + if not allowed_domains: + return "Outbound network access is unavailable because no allow-listed targets are configured." + + lines = ["Outbound network access is allowed only for these configured targets:"] + for allowed_domain in allowed_domains: + methods_text = ( + ", ".join(allowed_domain.methods) if allowed_domain.methods else "all methods allowed by the backend" + ) + lines.append(f"- `{allowed_domain.target}`: {methods_text}.") + return "\n".join(lines) + + +def build_codeact_instructions( + *, + tools: Sequence[FunctionTool], + tools_visible_to_model: bool, +) -> str: + """Build dynamic CodeAct instructions for the effective sandbox state.""" + usage_note = ( + "Some tools may also appear directly, but prefer `execute_code` whenever you need to combine Python " + "control flow with sandbox tool calls." + if tools_visible_to_model + else "Provider-owned sandbox tools are not exposed separately; use `execute_code` when you need them." + ) + + return f"""You have one primary tool: execute_code. + +Prefer one execute_code call per request when possible. +Its tool description contains the current `call_tool(...)` guidance, sandbox +tool registry, and capability limits. + +{usage_note} +""" + + +def build_execute_code_description( + *, + tools: Sequence[FunctionTool], + filesystem_enabled: bool, + workspace_enabled: bool, + mounted_paths: Sequence[str], + allowed_domains: Sequence[AllowedDomain], +) -> str: + """Build the dynamic execute_code tool description for standalone usage.""" + filesystem_text = _format_filesystem_capabilities( + filesystem_enabled=filesystem_enabled, + workspace_enabled=workspace_enabled, + mounted_paths=mounted_paths, + ) + network_text = _format_network_capabilities( + allowed_domains=allowed_domains, + ) + + return f"""Execute Python in an isolated Hyperlight sandbox. + +Inside the sandbox, `call_tool(name, **kwargs)` is available as a built-in for +registered host callbacks. Use the tool name as the first argument and keyword +arguments only. Do not pass a dict or any other positional arguments after the +tool name. + +Registered sandbox tools: +{_format_tool_summaries(tools)} + +Filesystem capabilities: +{filesystem_text} + +Network capabilities: +{network_text} + +Prefer `execute_code` when you need to combine one or more `call_tool(...)` +calls with Python control flow, loops, or post-processing. +""" diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_provider.py b/python/packages/hyperlight/agent_framework_hyperlight/_provider.py new file mode 100644 index 0000000000..1232ecc262 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_provider.py @@ -0,0 +1,111 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +from collections.abc import Callable, Sequence +from pathlib import Path +from typing import Any + +from agent_framework import AgentSession, ContextProvider, FunctionTool, SessionContext +from agent_framework._tools import ApprovalMode + +from ._execute_code_tool import HyperlightExecuteCodeTool, SandboxRuntime +from ._types import AllowedDomain, AllowedDomainInput, FileMount, FileMountInput + + +class HyperlightCodeActProvider(ContextProvider): + """Inject a Hyperlight-backed CodeAct surface using provider-owned tools.""" + + DEFAULT_SOURCE_ID = "hyperlight_codeact" + + def __init__( + self, + source_id: str = DEFAULT_SOURCE_ID, + *, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]] | None = None, + approval_mode: ApprovalMode | None = None, + workspace_root: str | Path | None = None, + file_mounts: FileMountInput | Sequence[FileMountInput] | None = None, + allowed_domains: AllowedDomainInput | Sequence[AllowedDomainInput] | None = None, + backend: str = "wasm", + module: str | None = "python_guest.path", + module_path: str | None = None, + _registry: SandboxRuntime | None = None, + ) -> None: + super().__init__(source_id) + self._execute_code_tool = HyperlightExecuteCodeTool( + tools=tools, + approval_mode=approval_mode, + workspace_root=workspace_root, + file_mounts=file_mounts, + allowed_domains=allowed_domains, + backend=backend, + module=module, + module_path=module_path, + _registry=_registry, + ) + + def add_tools( + self, + tools: FunctionTool | Callable[..., Any] | Sequence[FunctionTool | Callable[..., Any]], + ) -> None: + """Add provider-owned sandbox tools.""" + self._execute_code_tool.add_tools(tools) + + def get_tools(self) -> list[FunctionTool]: + """Return the provider-owned sandbox tools.""" + return self._execute_code_tool.get_tools() + + def remove_tool(self, name: str) -> None: + """Remove one provider-owned sandbox tool by name.""" + self._execute_code_tool.remove_tool(name) + + def clear_tools(self) -> None: + """Remove all provider-owned sandbox tools.""" + self._execute_code_tool.clear_tools() + + def add_file_mounts(self, file_mounts: FileMountInput | Sequence[FileMountInput]) -> None: + """Add provider-managed file mounts.""" + self._execute_code_tool.add_file_mounts(file_mounts) + + def get_file_mounts(self) -> list[FileMount]: + """Return the provider-managed file mounts.""" + return self._execute_code_tool.get_file_mounts() + + def remove_file_mount(self, mount_path: str) -> None: + """Remove one provider-managed file mount.""" + self._execute_code_tool.remove_file_mount(mount_path) + + def clear_file_mounts(self) -> None: + """Remove all provider-managed file mounts.""" + self._execute_code_tool.clear_file_mounts() + + def add_allowed_domains(self, domains: AllowedDomainInput | Sequence[AllowedDomainInput]) -> None: + """Add provider-managed outbound allow-list entries.""" + self._execute_code_tool.add_allowed_domains(domains) + + def get_allowed_domains(self) -> list[AllowedDomain]: + """Return the provider-managed outbound allow-list entries.""" + return self._execute_code_tool.get_allowed_domains() + + def remove_allowed_domain(self, domain: str) -> None: + """Remove one provider-managed outbound allow-list entry.""" + self._execute_code_tool.remove_allowed_domain(domain) + + def clear_allowed_domains(self) -> None: + """Remove all provider-managed outbound allow-list entries.""" + self._execute_code_tool.clear_allowed_domains() + + async def before_run( + self, + *, + agent: Any, + session: AgentSession | None, + context: SessionContext, + state: dict[str, Any], + ) -> None: + """Inject CodeAct instructions and a run-scoped execute_code tool before each run.""" + run_tool = self._execute_code_tool.create_run_tool() + state[self.source_id] = run_tool.build_serializable_state() + context.extend_instructions(self.source_id, run_tool.build_instructions(tools_visible_to_model=False)) + context.extend_tools(self.source_id, [run_tool]) diff --git a/python/packages/hyperlight/agent_framework_hyperlight/_types.py b/python/packages/hyperlight/agent_framework_hyperlight/_types.py new file mode 100644 index 0000000000..8d202c8986 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/_types.py @@ -0,0 +1,28 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +from collections.abc import Sequence +from pathlib import Path +from typing import NamedTuple, TypeAlias + + +class FileMount(NamedTuple): + """Map a host file or directory into the sandbox input tree.""" + + host_path: str | Path + mount_path: str + + +FileMountHostPath: TypeAlias = str | Path +FileMountInput: TypeAlias = str | tuple[FileMountHostPath, str] | FileMount + + +class AllowedDomain(NamedTuple): + """Allow outbound requests to one target, optionally restricted to specific HTTP methods.""" + + target: str + methods: tuple[str, ...] | None = None + + +AllowedDomainInput: TypeAlias = str | tuple[str, str | Sequence[str]] | AllowedDomain diff --git a/python/packages/hyperlight/agent_framework_hyperlight/py.typed b/python/packages/hyperlight/agent_framework_hyperlight/py.typed new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/python/packages/hyperlight/agent_framework_hyperlight/py.typed @@ -0,0 +1 @@ + diff --git a/python/packages/hyperlight/pyproject.toml b/python/packages/hyperlight/pyproject.toml new file mode 100644 index 0000000000..9884152043 --- /dev/null +++ b/python/packages/hyperlight/pyproject.toml @@ -0,0 +1,101 @@ +[project] +name = "agent-framework-hyperlight" +description = "Hyperlight CodeAct integrations for Microsoft Agent Framework." +authors = [{ name = "Microsoft", email = "af-support@microsoft.com"}] +readme = "README.md" +requires-python = ">=3.10" +version = "1.0.0a260409" +license-files = ["LICENSE"] +urls.homepage = "https://aka.ms/agent-framework" +urls.source = "https://github.com/microsoft/agent-framework/tree/main/python" +urls.release_notes = "https://github.com/microsoft/agent-framework/releases?q=tag%3Apython-1&expanded=true" +urls.issues = "https://github.com/microsoft/agent-framework/issues" +classifiers = [ + "License :: OSI Approved :: MIT License", + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Typing :: Typed", +] +dependencies = [ + "agent-framework-core>=1.0.0,<2", + "hyperlight-sandbox>=0.3.0,<0.4", + "hyperlight-sandbox-backend-wasm>=0.3.0,<0.4 ; (sys_platform == 'linux' or sys_platform == 'win32') and python_version < '3.14'", + "hyperlight-sandbox-python-guest>=0.3.0,<0.4", +] + +[tool.uv] +prerelease = "if-necessary-or-explicit" +environments = [ + "sys_platform == 'linux'", + "sys_platform == 'win32'" +] + +[tool.uv-dynamic-versioning] +fallback-version = "0.0.0" + +[tool.pytest.ini_options] +testpaths = 'tests' +addopts = "-ra -q -r fEX" +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +filterwarnings = [] +timeout = 120 +markers = [ + "integration: marks tests as integration tests that require external services", +] + +[tool.ruff] +extend = "../../pyproject.toml" + +[tool.ruff.lint.per-file-ignores] +"samples/**" = ["INP", "T201"] +"tests/**" = ["D", "INP", "TD", "ERA001", "RUF", "S"] + +[tool.coverage.run] +omit = [ + "**/__init__.py" +] + +[tool.pyright] +extends = "../../pyproject.toml" +include = ["agent_framework_hyperlight"] +exclude = ['tests'] + +[tool.mypy] +plugins = ['pydantic.mypy'] +strict = true +python_version = "3.10" +ignore_missing_imports = true +disallow_untyped_defs = true +no_implicit_optional = true +check_untyped_defs = true +warn_return_any = true +show_error_codes = true +warn_unused_ignores = false +disallow_incomplete_defs = true +disallow_untyped_decorators = true + +[tool.bandit] +targets = ["agent_framework_hyperlight"] +exclude_dirs = ["tests", "samples"] + +[tool.poe] +executor.type = "uv" +include = "../../shared_tasks.toml" + +[tool.poe.tasks.mypy] +help = "Run MyPy for this package." +cmd = "mypy --config-file $POE_ROOT/pyproject.toml agent_framework_hyperlight" + +[tool.poe.tasks.test] +help = "Run the default unit test suite for this package." +cmd = 'pytest -m "not integration" --cov=agent_framework_hyperlight --cov-report=term-missing:skip-covered tests' + +[build-system] +requires = ["flit-core >= 3.11,<4.0"] +build-backend = "flit_core.buildapi" diff --git a/python/packages/hyperlight/samples/README.md b/python/packages/hyperlight/samples/README.md new file mode 100644 index 0000000000..18896c4aa3 --- /dev/null +++ b/python/packages/hyperlight/samples/README.md @@ -0,0 +1,16 @@ +# Hyperlight CodeAct samples + +These samples demonstrate the alpha `agent-framework-hyperlight` package. + +- `codeact_context_provider.py` shows the provider-owned CodeAct model where the + agent only sees `execute_code` and sandbox tools are owned by + `HyperlightCodeActProvider`. +- `codeact_tool.py` shows the standalone `HyperlightExecuteCodeTool` surface + where `execute_code` is added directly to the agent tool list. + +Run the samples from the repository after installing the workspace dependencies: + +```bash +uv run --directory packages/hyperlight python samples/codeact_context_provider.py +uv run --directory packages/hyperlight python samples/codeact_tool.py +``` diff --git a/python/packages/hyperlight/samples/codeact_context_provider.py b/python/packages/hyperlight/samples/codeact_context_provider.py new file mode 100644 index 0000000000..c0cc03c2f6 --- /dev/null +++ b/python/packages/hyperlight/samples/codeact_context_provider.py @@ -0,0 +1,192 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import logging +import os +from collections.abc import Awaitable, Callable +from typing import Annotated, Any, Literal + +from agent_framework import Agent, FunctionInvocationContext, function_middleware, tool +from agent_framework.foundry import FoundryChatClient +from azure.identity import AzureCliCredential +from dotenv import load_dotenv + +from agent_framework_hyperlight import HyperlightCodeActProvider + +"""This sample demonstrates the provider-owned Hyperlight CodeAct flow. + +The sample keeps `compute` and `fetch_data` off the direct agent tool surface and +registers them only with `HyperlightCodeActProvider`. The model therefore sees a +single `execute_code` tool and must call the provider-owned tools from inside +the sandbox with `call_tool(...)`. +""" + +load_dotenv() + +_CYAN = "\033[36m" +_YELLOW = "\033[33m" +_GREEN = "\033[32m" +_DIM = "\033[2m" +_RESET = "\033[0m" + + +class _ColoredFormatter(logging.Formatter): + """Dim logger output so it does not compete with sample prints.""" + + def format(self, record: logging.LogRecord) -> str: + return f"{_DIM}{super().format(record)}{_RESET}" + + +logging.basicConfig(level=logging.WARNING) +logging.getLogger().handlers[0].setFormatter( + _ColoredFormatter("[%(asctime)s] %(levelname)s: %(message)s"), +) + + +@function_middleware +async def log_function_calls( + context: FunctionInvocationContext, + call_next: Callable[[], Awaitable[None]], +) -> None: + """Log tool calls, including readable execute_code blocks.""" + import time + + function_name = context.function.name + arguments = context.arguments if isinstance(context.arguments, dict) else {} + + if function_name == "execute_code" and "code" in arguments: + print(f"\n{_YELLOW}{'─' * 60}") + print("▶ execute_code") + print(f"{'─' * 60}{_RESET}") + print(arguments["code"]) + print(f"{_YELLOW}{'─' * 60}{_RESET}") + else: + pairs = ", ".join(f"{name}={value!r}" for name, value in arguments.items()) + print(f"\n{_YELLOW}▶ {function_name}({pairs}){_RESET}") + + start = time.perf_counter() + await call_next() + elapsed = time.perf_counter() - start + + result = context.result + if function_name == "execute_code" and isinstance(result, list): + for item in result: + if item.type != "code_interpreter_tool_result": + continue + + for output in item.outputs or []: + if output.type == "text" and output.text: + print(f"{_GREEN}stdout:\n{output.text}{_RESET}") + if output.type == "error" and output.error_details: + print(f"{_YELLOW}stderr:\n{output.error_details}{_RESET}") + else: + print(f"{_YELLOW}◀ {function_name} → {result!r}{_RESET}") + + print(f"{_DIM} ({elapsed:.4f}s){_RESET}") + + +@tool(approval_mode="never_require") +def compute( + operation: Annotated[ + Literal["add", "subtract", "multiply", "divide"], + "Math operation: add, subtract, multiply, or divide.", + ], + a: Annotated[float, "First numeric operand."], + b: Annotated[float, "Second numeric operand."], +) -> float: + """Perform a math operation for sandboxed code.""" + operations = { + "add": a + b, + "subtract": a - b, + "multiply": a * b, + "divide": a / b if b else float("inf"), + } + return operations[operation] + + +@tool(approval_mode="never_require") +async def fetch_data( + table: Annotated[str, "Name of the simulated table to query."], +) -> list[dict[str, Any]]: + """Fetch records from a named table.""" + await asyncio.sleep(0.5) + data: dict[str, list[dict[str, Any]]] = { + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"}, + {"id": 3, "name": "Charlie", "role": "admin"}, + ], + "products": [ + {"id": 101, "name": "Widget", "price": 9.99}, + {"id": 102, "name": "Gadget", "price": 19.99}, + ], + } + return data.get(table, []) + + +async def main() -> None: + """Run the provider-owned Hyperlight CodeAct sample.""" + # 1. Create the Hyperlight-backed provider and register sandbox tools on it. + codeact = HyperlightCodeActProvider( + tools=[compute, fetch_data], + approval_mode="never_require", + ) + + # 2. Create the client and the agent. + agent = Agent( + client=FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=AzureCliCredential(), + ), + name="HyperlightCodeActProviderAgent", + instructions="You are a helpful assistant.", + context_providers=[codeact], + middleware=[log_function_calls], + ) + + # 3. Run a request that should use execute_code plus provider-owned tools. + query = ( + "Fetch all users, find admins, multiply 7*(3*2), and print the users, " + "admins, and multiplication result. Use execute_code and call_tool(...) " + "inside the sandbox." + ) + print(f"{_CYAN}{'=' * 60}") + print("Hyperlight CodeAct provider sample") + print(f"{'=' * 60}{_RESET}") + print(f"{_CYAN}User: {query}{_RESET}") + result = await agent.run(query) + print(f"{_CYAN}Agent: {result.text}{_RESET}") + + +""" +Sample output (shape only): + +============================================================ +Hyperlight CodeAct provider sample +============================================================ +User: Fetch all users, find admins, multiply 7*(3*2), ... + +──────────────────────────────────────────────────────────── +▶ execute_code +──────────────────────────────────────────────────────────── +users = call_tool("fetch_data", table="users") +admins = [user for user in users if user["role"] == "admin"] +result = call_tool("compute", operation="multiply", a=7, b=6) +print("Users:", users) +print("Admins:", admins) +print("7 * 6 =", result) +──────────────────────────────────────────────────────────── +stdout: +Users: [...] +Admins: [...] +7 * 6 = 42.0 + (0.0xxx s) +Agent: ... +""" + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/packages/hyperlight/samples/codeact_tool.py b/python/packages/hyperlight/samples/codeact_tool.py new file mode 100644 index 0000000000..64c0e6fde5 --- /dev/null +++ b/python/packages/hyperlight/samples/codeact_tool.py @@ -0,0 +1,110 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import os +from typing import Annotated, Any, Literal + +from agent_framework import Agent, tool +from agent_framework.foundry import FoundryChatClient +from azure.identity import AzureCliCredential +from dotenv import load_dotenv + +from agent_framework_hyperlight import HyperlightExecuteCodeTool + +"""This sample demonstrates the standalone Hyperlight execute_code tool. + +The sample adds `HyperlightExecuteCodeTool` directly to the agent. The tool's +own description advertises `call_tool(...)`, the registered sandbox tools, and +the current capability configuration, so no extra CodeAct-specific agent +instructions are required. +""" + +load_dotenv() + + +@tool(approval_mode="never_require") +def compute( + operation: Annotated[ + Literal["add", "subtract", "multiply", "divide"], + "Math operation: add, subtract, multiply, or divide.", + ], + a: Annotated[float, "First numeric operand."], + b: Annotated[float, "Second numeric operand."], +) -> float: + """Perform a math operation used by sandboxed code.""" + operations = { + "add": a + b, + "subtract": a - b, + "multiply": a * b, + "divide": a / b if b else float("inf"), + } + return operations[operation] + + +@tool(approval_mode="never_require") +def fetch_data( + table: Annotated[str, "Name of the simulated table to query."], +) -> list[dict[str, Any]]: + """Fetch simulated records from a named table.""" + data: dict[str, list[dict[str, Any]]] = { + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"}, + {"id": 3, "name": "Charlie", "role": "admin"}, + ], + "products": [ + {"id": 101, "name": "Widget", "price": 9.99}, + {"id": 102, "name": "Gadget", "price": 19.99}, + ], + } + return data.get(table, []) + + +async def main() -> None: + """Run the standalone execute_code sample.""" + # 1. Create the packaged execute_code tool and register sandbox tools on it. + execute_code = HyperlightExecuteCodeTool( + tools=[compute, fetch_data], + approval_mode="never_require", + ) + + # 2. Create the client and the agent. + agent = Agent( + client=FoundryChatClient( + project_endpoint=os.environ["FOUNDRY_PROJECT_ENDPOINT"], + model=os.environ["FOUNDRY_MODEL"], + credential=AzureCliCredential(), + ), + name="HyperlightExecuteCodeToolAgent", + instructions="You are a helpful assistant.", + tools=execute_code, + ) + + # 3. Run one request through the direct-tool surface. + print("=" * 60) + print("Hyperlight execute_code tool sample") + print("=" * 60) + query = ( + "Fetch all users, find admins, multiply 6*7, and print the users, admins, " + "and multiplication result. Use one execute_code call." + ) + print(f"User: {query}") + result = await agent.run(query) + print(f"Agent: {result.text}") + + +""" +Sample output (shape only): + +============================================================ +Hyperlight execute_code tool sample +============================================================ +User: Fetch all users, find admins, multiply 6*7, ... +Agent: ... +""" + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/packages/hyperlight/tests/hyperlight/test_hyperlight_codeact.py b/python/packages/hyperlight/tests/hyperlight/test_hyperlight_codeact.py new file mode 100644 index 0000000000..e8db02eb99 --- /dev/null +++ b/python/packages/hyperlight/tests/hyperlight/test_hyperlight_codeact.py @@ -0,0 +1,522 @@ +# Copyright (c) Microsoft. All rights reserved. + +from __future__ import annotations + +import asyncio +import importlib.metadata +import importlib.util +import inspect +import json +import sys +import threading +from collections.abc import Awaitable, Callable, Iterator, Mapping, MutableSequence +from contextlib import contextmanager +from dataclasses import dataclass +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import Any + +import pytest +from agent_framework import ( + Agent, + BaseChatClient, + ChatResponse, + ChatResponseUpdate, + Content, + FunctionInvocationLayer, + Message, + ResponseStream, + tool, +) + +from agent_framework_hyperlight import AllowedDomain, FileMount, HyperlightCodeActProvider, HyperlightExecuteCodeTool +from agent_framework_hyperlight import _execute_code_tool as execute_code_module + + +def _hyperlight_integration_skip_reason() -> str | None: + if sys.version_info >= (3, 14): + return ( + "Hyperlight integration tests require Python < 3.14 because hyperlight-sandbox-backend-wasm is unsupported." + ) + + if sys.platform not in {"linux", "win32"}: + return "Hyperlight integration tests require Linux or Windows runners." + + if importlib.util.find_spec("hyperlight_sandbox") is None: + return "hyperlight-sandbox is not installed." + + if importlib.util.find_spec("python_guest") is None: + return "hyperlight-sandbox-python-guest is not installed." + + try: + importlib.metadata.version("hyperlight-sandbox-backend-wasm") + except importlib.metadata.PackageNotFoundError: + return "hyperlight-sandbox-backend-wasm is not installed." + + return None + + +skip_if_hyperlight_integration_tests_disabled = pytest.mark.skipif( + (reason := _hyperlight_integration_skip_reason()) is not None, + reason=reason or "Hyperlight integration tests are disabled.", +) + + +@tool(approval_mode="never_require") +def compute(a: int, b: int) -> int: + return a + b + + +@tool(approval_mode="always_require") +def dangerous_compute(a: int, b: int) -> int: + return a * b + + +@tool(name="compute", approval_mode="always_require") +def replacement_compute(a: int, b: int) -> int: + return a - b + + +@dataclass(slots=True) +class _FakeResult: + success: bool + stdout: str = "" + stderr: str = "" + + +def _run_in_thread(callback: Callable[[], Any]) -> Any: + result: dict[str, Any] = {} + error: dict[str, BaseException] = {} + + def _runner() -> None: + try: + result["value"] = callback() + except BaseException as exc: + error["value"] = exc + + thread = threading.Thread(target=_runner) + thread.start() + thread.join() + + if "value" in error: + raise error["value"] + + return result.get("value") + + +class _FakeSandbox: + instances: list[_FakeSandbox] = [] + + def __init__( + self, + *, + input_dir: str | None = None, + output_dir: str | None = None, + temp_output: bool = False, + backend: str = "wasm", + module: str | None = None, + module_path: str | None = None, + heap_size: str | None = None, + stack_size: str | None = None, + ) -> None: + self.input_dir = input_dir + self.output_dir = output_dir + self.registered_tools: dict[str, Any] = {} + self.allowed_domains: list[tuple[str, list[str] | None]] = [] + self.restore_calls: list[Any] = [] + self.output_files: list[str] = [] + _FakeSandbox.instances.append(self) + + def register_tool(self, name_or_tool: Any, callback: Any | None = None) -> None: + if callback is None: + raise AssertionError("Expected callback registration for sandbox tools.") + self.registered_tools[str(name_or_tool)] = callback + + def allow_domain(self, target: str, methods: list[str] | None = None) -> None: + self.allowed_domains.append((target, methods)) + + def _invoke_tool(self, name: str, **kwargs: Any) -> Any: + callback = self.registered_tools[name] + if inspect.iscoroutinefunction(callback): + return _run_in_thread(lambda: asyncio.run(callback(**kwargs))) + + result = callback(**kwargs) + if inspect.isawaitable(result): + return _run_in_thread(lambda: asyncio.run(result)) + return result + + def run(self, code: str) -> _FakeResult: + if code == "None": + return _FakeResult(success=True) + if code == "create-output": + if self.output_dir is None: + raise AssertionError("Expected output directory for create-output test.") + Path(self.output_dir, "report.txt").write_text("artifact", encoding="utf-8") + self.output_files = ["report.txt"] + return _FakeResult(success=True, stdout="done\n") + if 'call_tool("compute", a=20, b=22)' in code: + total = self._invoke_tool("compute", a=20, b=22) + return _FakeResult(success=True, stdout=f"{total}\n") + return _FakeResult(success=False, stderr="sandbox boom") + + def snapshot(self) -> str: + return "snapshot" + + def restore(self, snapshot: Any) -> None: + self.restore_calls.append(snapshot) + + def get_output_files(self) -> list[str]: + return list(self.output_files) + + +class _FakeRuntime: + def __init__(self) -> None: + self.calls: list[tuple[Any, str]] = [] + + def execute(self, *, config: Any, code: str) -> list[Content]: + self.calls.append((config, code)) + return [Content.from_text("ok")] + + +class _FakeSessionContext: + def __init__(self, *, tools: list[Any] | None = None) -> None: + self.options: dict[str, Any] = {} + if tools is not None: + self.options["tools"] = tools + self.instructions: list[tuple[str, str]] = [] + self.tools: list[tuple[str, list[Any]]] = [] + + def extend_instructions(self, source_id: str, instructions: str) -> None: + self.instructions.append((source_id, instructions)) + + def extend_tools(self, source_id: str, tools: list[Any]) -> None: + self.tools.append((source_id, tools)) + + +def _extract_execute_code_result(function_result: Content) -> Content: + assert function_result.type == "function_result" + assert function_result.exception is None, ( + f"execute_code raised {function_result.exception!r} with items={function_result.items!r}" + ) + + code_result = next( + (item for item in function_result.items or [] if item.type == "code_interpreter_tool_result"), + None, + ) + if code_result is not None: + return code_result + + text_outputs = [item for item in function_result.items or [] if item.type == "text"] + if text_outputs: + return Content.from_code_interpreter_tool_result(outputs=text_outputs) + + if function_result.result: + return Content.from_code_interpreter_tool_result(outputs=[Content.from_text(function_result.result)]) + + raise AssertionError(f"execute_code returned no usable outputs: {function_result.items!r}") + + +def _extract_text_output(result_content: Content) -> str: + code_result = _extract_execute_code_result(result_content) + text_output = next( + (item for item in code_result.outputs or [] if item.type == "text" and item.text is not None), None + ) + assert text_output is not None and text_output.text is not None, ( + f"Expected text output from execute_code, got {code_result.outputs!r}" + ) + return text_output.text + + +@contextmanager +def _serve_http_text_response(body: bytes) -> Iterator[tuple[str, list[str]]]: + requests: list[str] = [] + + class _Handler(BaseHTTPRequestHandler): + def do_GET(self) -> None: # noqa: N802 + requests.append(self.path) + self.send_response(200) + self.send_header("Content-Type", "text/plain; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, format: str, *args: Any) -> None: + return + + server = ThreadingHTTPServer(("127.0.0.1", 0), _Handler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + + try: + yield f"127.0.0.1:{server.server_port}", requests + finally: + server.shutdown() + server.server_close() + thread.join() + + +class _FakeCodeActChatClient(FunctionInvocationLayer[Any], BaseChatClient[Any]): + def __init__(self) -> None: + FunctionInvocationLayer.__init__(self) + BaseChatClient.__init__(self) + self.call_count = 0 + + def _inner_get_response( + self, + *, + messages: MutableSequence[Message], + stream: bool, + options: Mapping[str, Any], + **kwargs: Any, + ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]: + if stream: + raise AssertionError("Streaming is not used in this integration test.") + + async def _get_response() -> ChatResponse: + self.call_count += 1 + + if self.call_count == 1: + return ChatResponse( + messages=Message( + role="assistant", + contents=[ + Content.from_function_call( + call_id="execute_code_call", + name="execute_code", + arguments={ + "code": 'total = call_tool("compute", a=20, b=22)\nprint(total)', + }, + ) + ], + ) + ) + + function_results = [ + content for message in messages for content in message.contents if content.type == "function_result" + ] + assert len(function_results) == 1 + + result_content = function_results[0] + assert result_content.call_id == "execute_code_call" + assert _extract_text_output(result_content) == "42\n" + + return ChatResponse(messages=Message(role="assistant", contents=["The sandbox returned 42."])) + + return _get_response() + + +def test_execute_code_tool_updates_approval_with_managed_tools() -> None: + execute_code = HyperlightExecuteCodeTool(tools=[compute], _registry=_FakeRuntime()) + assert execute_code.approval_mode == "never_require" + + execute_code.add_tools([dangerous_compute]) + assert execute_code.approval_mode == "always_require" + + +def test_execute_code_tool_replaces_tools_with_the_same_name() -> None: + execute_code = HyperlightExecuteCodeTool(tools=[compute], _registry=_FakeRuntime()) + + execute_code.add_tools(replacement_compute) + + tools = execute_code.get_tools() + assert len(tools) == 1 + assert tools[0] is replacement_compute + assert execute_code.approval_mode == "always_require" + + +def test_execute_code_tool_accepts_string_and_tuple_file_mounts_without_mode_flags( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + shorthand_file = tmp_path / "notes.txt" + shorthand_file.write_text("hello", encoding="utf-8") + explicit_file = tmp_path / "data.json" + explicit_file.write_text('{"hello": "world"}', encoding="utf-8") + monkeypatch.chdir(tmp_path) + + execute_code = HyperlightExecuteCodeTool(_registry=_FakeRuntime()) + execute_code.add_file_mounts("notes.txt") + execute_code.add_file_mounts((explicit_file, "data/data.json")) + + assert execute_code.get_file_mounts() == [ + FileMount(shorthand_file.resolve(), "/input/notes.txt"), + FileMount(explicit_file.resolve(), "/input/data/data.json"), + ] + + +def test_execute_code_tool_allowed_domains_use_structured_entries_and_replace_by_target() -> None: + execute_code = HyperlightExecuteCodeTool(_registry=_FakeRuntime()) + + execute_code.add_allowed_domains(["https://api.example.com/v1", ("github.com", "get")]) + execute_code.add_allowed_domains([ + AllowedDomain("api.example.com", ("post", "get")), + ("github.com", ["head", "get"]), + ]) + + assert execute_code.get_allowed_domains() == [ + AllowedDomain("api.example.com", ("GET", "POST")), + AllowedDomain("github.com", ("GET", "HEAD")), + ] + + +def test_execute_code_tool_description_contains_call_tool_guidance(tmp_path: Path) -> None: + workspace_root = tmp_path / "workspace" + workspace_root.mkdir() + (workspace_root / "notes.txt").write_text("hello", encoding="utf-8") + mount_file = tmp_path / "data.json" + mount_file.write_text('{"hello": "world"}', encoding="utf-8") + + execute_code = HyperlightExecuteCodeTool( + tools=[compute], + workspace_root=workspace_root, + file_mounts=[FileMount(str(mount_file), "data/data.json")], + allowed_domains=[AllowedDomain("https://api.example.com/v1", ("get", "post")), "github.com"], + _registry=_FakeRuntime(), + ) + + description = execute_code.description + + assert "call_tool(name, **kwargs)" in description + assert "compute" in description + assert "/input/data/data.json" in description + assert "/output" in description + assert "api.example.com" in description + assert "GET, POST" in description + assert "github.com" in description + + +async def test_execute_code_tool_executes_with_structured_content(monkeypatch: pytest.MonkeyPatch) -> None: + _FakeSandbox.instances.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox) + + execute_code = HyperlightExecuteCodeTool( + tools=[compute], + file_mounts=[FileMount(Path(__file__), "fixtures/source.py")], + allowed_domains=[("api.example.com", "get")], + ) + + result = await execute_code.invoke(arguments={"code": "create-output"}) + + assert result[0].type == "code_interpreter_tool_result" + assert result[0].outputs is not None + assert result[0].outputs[0].type == "text" + assert result[0].outputs[0].text == "done\n" + assert any(item.type == "data" for item in result[0].outputs) + assert _FakeSandbox.instances[0].allowed_domains == [("api.example.com", ["GET"])] + assert "compute" in _FakeSandbox.instances[0].registered_tools + + +async def test_execute_code_tool_failure_returns_error_content(monkeypatch: pytest.MonkeyPatch) -> None: + _FakeSandbox.instances.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox) + + execute_code = HyperlightExecuteCodeTool() + result = await execute_code.invoke(arguments={"code": "fail"}) + + assert result[0].type == "code_interpreter_tool_result" + assert result[0].outputs is not None + assert result[0].outputs[0].type == "error" + assert result[0].outputs[0].error_details == "sandbox boom" + + +async def test_provider_injects_run_scoped_execute_code_tool() -> None: + runtime = _FakeRuntime() + provider = HyperlightCodeActProvider(tools=[compute], _registry=runtime) + context = _FakeSessionContext(tools=[dangerous_compute]) + state: dict[str, Any] = {} + + await provider.before_run(agent=object(), session=None, context=context, state=state) + + assert context.options["tools"] == [dangerous_compute] + assert len(context.instructions) == 1 + assert len(context.tools) == 1 + + run_tool = context.tools[0][1][0] + assert isinstance(run_tool, HyperlightExecuteCodeTool) + assert run_tool.approval_mode == "never_require" + assert [tool_obj.name for tool_obj in run_tool.get_tools()] == ["compute"] + assert "dangerous_compute" not in context.instructions[0][1] + assert "compute" not in context.instructions[0][1] + assert "Filesystem capabilities:" not in context.instructions[0][1] + assert state[provider.source_id]["tool_names"] == ["compute"] + assert state[provider.source_id]["approval_mode"] == "never_require" + json.dumps(state) + + provider.remove_tool("compute") + assert [tool_obj.name for tool_obj in run_tool.get_tools()] == ["compute"] + + +async def test_agent_runs_hyperlight_codeact_end_to_end_with_fake_sandbox(monkeypatch: pytest.MonkeyPatch) -> None: + _FakeSandbox.instances.clear() + monkeypatch.setattr(execute_code_module, "_load_sandbox_class", lambda: _FakeSandbox) + + client = _FakeCodeActChatClient() + provider = HyperlightCodeActProvider(tools=[compute]) + agent = Agent(client=client, context_providers=[provider]) + + response = await agent.run("Use the sandbox to add 20 and 22.") + + assert response.text == "The sandbox returned 42." + assert client.call_count == 2 + assert len(_FakeSandbox.instances) == 1 + assert "compute" in _FakeSandbox.instances[0].registered_tools + + +@pytest.mark.integration +@skip_if_hyperlight_integration_tests_disabled +async def test_agent_runs_hyperlight_codeact_end_to_end_with_real_sandbox() -> None: + client = _FakeCodeActChatClient() + provider = HyperlightCodeActProvider(tools=[compute]) + agent = Agent(client=client, context_providers=[provider]) + + response = await agent.run("Use the sandbox to add 20 and 22.") + + assert response.text == "The sandbox returned 42." + assert client.call_count == 2 + + +@pytest.mark.integration +@skip_if_hyperlight_integration_tests_disabled +async def test_provider_run_tool_reads_writes_files_and_accesses_allowed_url_with_real_sandbox( + tmp_path: Path, +) -> None: + mounted_file = tmp_path / "mounted.txt" + mounted_file.write_text("hello from mount", encoding="utf-8") + + with _serve_http_text_response(b"network ok") as (allowed_host, requests): + provider = HyperlightCodeActProvider() + provider.add_file_mounts((mounted_file, "data/input.txt")) + provider.add_allowed_domains((allowed_host, "GET")) + + context = _FakeSessionContext() + state: dict[str, Any] = {} + await provider.before_run(agent=object(), session=None, context=context, state=state) + + run_tool = context.tools[0][1][0] + assert isinstance(run_tool, HyperlightExecuteCodeTool) + + result = await run_tool.invoke( + arguments={ + "code": ( + "from pathlib import Path\n" + "from urllib.request import urlopen\n\n" + 'input_text = Path("/input/data/input.txt").read_text(encoding="utf-8")\n' + 'Path("/output/result.txt").write_text(input_text.upper(), encoding="utf-8")\n' + f'with urlopen("http://{allowed_host}/allowed", timeout=10) as response:\n' + ' network_text = response.read().decode("utf-8")\n' + "print(input_text)\n" + "print(network_text)\n" + ) + } + ) + + assert result[0].type == "code_interpreter_tool_result" + outputs = result[0].outputs or [] + + text_output = next(item for item in outputs if item.type == "text" and item.text is not None) + assert text_output.text == "hello from mount\nnetwork ok\n" + + file_output = next(item for item in outputs if item.type == "data") + assert file_output.data == b"HELLO FROM MOUNT" + assert file_output.additional_properties["path"] == "/output/result.txt" + assert requests == ["/allowed"] diff --git a/python/pyproject.toml b/python/pyproject.toml index 92b37b57c7..4cd9dd37e8 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -79,6 +79,7 @@ agent-framework-devui = { workspace = true } agent-framework-durabletask = { workspace = true } agent-framework-foundry = { workspace = true } agent-framework-foundry-local = { workspace = true } +agent-framework-hyperlight = { workspace = true } agent-framework-lab = { workspace = true } agent-framework-mem0 = { workspace = true } agent-framework-ollama = { workspace = true } diff --git a/python/uv.lock b/python/uv.lock index c22522d898..5028347ed2 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -44,6 +44,7 @@ members = [ "agent-framework-foundry", "agent-framework-foundry-local", "agent-framework-github-copilot", + "agent-framework-hyperlight", "agent-framework-lab", "agent-framework-mem0", "agent-framework-ollama", @@ -529,6 +530,25 @@ requires-dist = [ { name = "github-copilot-sdk", marker = "python_full_version >= '3.11'", specifier = ">=0.2.1,<=0.2.1" }, ] +[[package]] +name = "agent-framework-hyperlight" +version = "1.0.0a260409" +source = { editable = "packages/hyperlight" } +dependencies = [ + { name = "agent-framework-core", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "hyperlight-sandbox", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, + { name = "hyperlight-sandbox-backend-wasm", marker = "(python_full_version < '3.14' and sys_platform == 'linux') or (python_full_version < '3.14' and sys_platform == 'win32')" }, + { name = "hyperlight-sandbox-python-guest", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, +] + +[package.metadata] +requires-dist = [ + { name = "agent-framework-core", editable = "packages/core" }, + { name = "hyperlight-sandbox", specifier = ">=0.3.0,<0.4" }, + { name = "hyperlight-sandbox-backend-wasm", marker = "(python_full_version < '3.14' and sys_platform == 'linux') or (python_full_version < '3.14' and sys_platform == 'win32')", specifier = ">=0.3.0,<0.4" }, + { name = "hyperlight-sandbox-python-guest", specifier = ">=0.3.0,<0.4" }, +] + [[package]] name = "agent-framework-lab" version = "1.0.0b260409" @@ -2677,6 +2697,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, ] +[[package]] +name = "hyperlight-sandbox" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/fe/ce88996ea3e3e05130d6f0e8cd2ffbe9ab9bf3d9448b7050d4b8d0802b0a/hyperlight_sandbox-0.3.0.tar.gz", hash = "sha256:00491ce267ffbdb206377c79b4afd86510177ad73f4daf2ef7fce02b54eaf801", size = 9251, upload-time = "2026-04-07T03:49:52.542Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/33/e6dcd6729308d13570ae2d3be0e476019a6f3fea387d7549bb1f77ce0408/hyperlight_sandbox-0.3.0-py3-none-any.whl", hash = "sha256:ba8e6779d64e9c187acd93456851ebafaed2f49380e5d132bc0906a4080d2217", size = 5723, upload-time = "2026-04-07T03:49:53.276Z" }, +] + +[[package]] +name = "hyperlight-sandbox-backend-wasm" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/91/c9d68cad7996fdd2f1facef1453156bdd8d52eefa976cc8c827c13029497/hyperlight_sandbox_backend_wasm-0.3.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:eda362f5f737b0823326290d7627c76ce0547a78e70f07f8c9d177e34622fc02", size = 3806454, upload-time = "2026-04-07T03:49:24.238Z" }, + { url = "https://files.pythonhosted.org/packages/9a/6f/6b2399a1caf59dd19b635d99ee1add0c975af7bc3317f5d0f1f9c3f90aa0/hyperlight_sandbox_backend_wasm-0.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:79347b7ae94f2786691b04cb52130dabc5991e0c03b42a24bad8adc766832655", size = 3283951, upload-time = "2026-04-07T03:49:17.137Z" }, + { url = "https://files.pythonhosted.org/packages/23/f2/b380c34a0ce8d486a05adb66757f98cca029e1fb1c96b1c29be0d25d3882/hyperlight_sandbox_backend_wasm-0.3.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:aff9eec4803fb535a140298e2632529f4150fcf3c6ea3ff2ae4571572a836116", size = 3806601, upload-time = "2026-04-07T03:49:22.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/5a/fb78cfd934e0523887b8d5b073b7b2aed3b545add21cda3aa95929ac1659/hyperlight_sandbox_backend_wasm-0.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:b6151704dd19862c9869b115752b4504b45d0b2eeb46aa9385a1a3b8be11cfa8", size = 3284164, upload-time = "2026-04-07T03:49:18.556Z" }, + { url = "https://files.pythonhosted.org/packages/21/bc/4e21f5c7ccd9307ac63a61c71b62a57ee4a9e6eec77fc72ff072907a21f5/hyperlight_sandbox_backend_wasm-0.3.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:cfd1d22ce221774d82a5174d268d56ff70fc1a23fb993a6491358b5d0ed169bf", size = 3802901, upload-time = "2026-04-07T03:49:19.845Z" }, + { url = "https://files.pythonhosted.org/packages/9a/41/646be9b0c7bb0f9192e45a77414673aa414eb316c92b5312efe6fb4ce802/hyperlight_sandbox_backend_wasm-0.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:229ab494a422f2de895a2a27ad6a6a2daed710ea062d7c213878bbe5f5b32fa7", size = 3281220, upload-time = "2026-04-07T03:49:21.368Z" }, + { url = "https://files.pythonhosted.org/packages/74/3a/f8ec4a41fffba4036dfc3cbddc3dfb6e87466b01afe1cb0a50cc6a0f0eed/hyperlight_sandbox_backend_wasm-0.3.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b91905ee2ddd36a78b0dd13b1a62be99a995a45121587c111692591e40b36912", size = 3802789, upload-time = "2026-04-07T03:49:15.614Z" }, + { url = "https://files.pythonhosted.org/packages/3c/62/dfa8c15102f9b8ec5c3b5ffb54b99d60c75e7a6e4d00540757656bc5a5d8/hyperlight_sandbox_backend_wasm-0.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:eff682761c3b86abfe7e0d523ea0e6d5c7e8299302917c53918743b82c9d1ea2", size = 3280501, upload-time = "2026-04-07T03:49:13.939Z" }, +] + +[[package]] +name = "hyperlight-sandbox-python-guest" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/23/6a/f182c4315d31a98dd3b82f9274638e3adb399779584af93c5087bb2f814f/hyperlight_sandbox_python_guest-0.3.0.tar.gz", hash = "sha256:b1de5d8e87375dc6bef744ecd7ae2a7f43d5f6b913b4e990e9872bd439c0b19e", size = 21554625, upload-time = "2026-04-07T03:49:42.672Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/8e/4cd754928464f56528645c7421ccbb3fcbe45ad2542f899712b0f2f2c0e1/hyperlight_sandbox_python_guest-0.3.0-py3-none-any.whl", hash = "sha256:3c55a7420666ad9a208893dbdf7ad1b5c8ad4f3a94b1a56e64979719c7ce95c1", size = 21716481, upload-time = "2026-04-07T03:49:39.885Z" }, +] + [[package]] name = "idna" version = "3.11"