diff --git a/.github/agents/agentic-workflows.agent.md b/.github/agents/agentic-workflows.agent.md index 776cdea..d94ac15 100644 --- a/.github/agents/agentic-workflows.agent.md +++ b/.github/agents/agentic-workflows.agent.md @@ -97,5 +97,5 @@ ado-aw check my-agent.yml - Agent files must be compiled with `ado-aw compile` after frontmatter changes - Markdown body changes don't require recompilation -- Follow security best practices: minimal permissions, explicit `network.allow`, scoped service connections +- Follow security best practices: minimal permissions, explicit `network.allowed`, scoped service connections - Reference full docs at the [AGENTS.md](../../AGENTS.md) in this repo diff --git a/.github/workflows/update-awf-version.lock.yml b/.github/workflows/update-awf-version.lock.yml index 3d85347..f90eae1 100644 --- a/.github/workflows/update-awf-version.lock.yml +++ b/.github/workflows/update-awf-version.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"cb8cbfd520e7a77629b57d7de3c1bf9f73b1ed875bf268c72f055eac67a102f3","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"206b4b4fc88867928fa4ef288b770b224cd48404685eca0c639ddd4db3a72525","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_CI_TRIGGER_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc","version":"v0.68.1"}]} # ___ _ _ # / _ \ | | (_) @@ -22,7 +22,7 @@ # # For more information: https://github.github.com/gh-aw/introduction/overview/ # -# Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and opens PRs to update pinned version constants +# Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and syncs ecosystem_domains.json from gh-aw. Opens PRs for any updates found. # # Secrets used: # - COPILOT_GITHUB_TOKEN @@ -158,19 +158,19 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh" { - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' - GH_AW_PROMPT_45895f8f0ea189d4_EOF + GH_AW_PROMPT_168313d4fdec629b_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' - Tools: create_pull_request(max:3), missing_tool, missing_data, noop - GH_AW_PROMPT_45895f8f0ea189d4_EOF + Tools: create_pull_request(max:4), missing_tool, missing_data, noop + GH_AW_PROMPT_168313d4fdec629b_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_create_pull_request.md" - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' The following GitHub context information is available for this workflow: @@ -200,12 +200,12 @@ jobs: {{/if}} - GH_AW_PROMPT_45895f8f0ea189d4_EOF + GH_AW_PROMPT_168313d4fdec629b_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' {{#runtime-import .github/workflows/update-awf-version.md}} - GH_AW_PROMPT_45895f8f0ea189d4_EOF + GH_AW_PROMPT_168313d4fdec629b_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 @@ -371,15 +371,15 @@ jobs: mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_8ca098a8137f403d_EOF' - {"create_pull_request":{"max":3,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"]},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} - GH_AW_SAFE_OUTPUTS_CONFIG_8ca098a8137f403d_EOF + cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_0dee48e36c1f8b4f_EOF' + {"create_pull_request":{"max":4,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"]},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} + GH_AW_SAFE_OUTPUTS_CONFIG_0dee48e36c1f8b4f_EOF - name: Write Safe Outputs Tools env: GH_AW_TOOLS_META_JSON: | { "description_suffixes": { - "create_pull_request": " CONSTRAINTS: Maximum 3 pull request(s) can be created." + "create_pull_request": " CONSTRAINTS: Maximum 4 pull request(s) can be created." }, "repo_params": {}, "dynamic_tools": [] @@ -571,7 +571,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_8fe4e51e6e54b1d2_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" + cat << GH_AW_MCP_CONFIG_3a914e19b96138f1_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" { "mcpServers": { "github": { @@ -612,7 +612,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_8fe4e51e6e54b1d2_EOF + GH_AW_MCP_CONFIG_3a914e19b96138f1_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: @@ -1007,7 +1007,7 @@ jobs: uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 env: WORKFLOW_NAME: "Dependency Version Updater" - WORKFLOW_DESCRIPTION: "Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and opens PRs to update pinned version constants" + WORKFLOW_DESCRIPTION: "Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and syncs ecosystem_domains.json from gh-aw. Opens PRs for any updates found." HAS_PATCH: ${{ needs.agent.outputs.has_patch }} with: script: | @@ -1172,7 +1172,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_pull_request\":{\"max\":3,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_path_prefixes\":[\".github/\",\".agents/\"]},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"report_incomplete\":{}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_pull_request\":{\"max\":4,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_path_prefixes\":[\".github/\",\".agents/\"]},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"report_incomplete\":{}}" GH_AW_CI_TRIGGER_TOKEN: ${{ secrets.GH_AW_CI_TRIGGER_TOKEN }} with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/update-awf-version.md b/.github/workflows/update-awf-version.md index d7ba9a0..6a68205 100644 --- a/.github/workflows/update-awf-version.md +++ b/.github/workflows/update-awf-version.md @@ -1,7 +1,7 @@ --- on: schedule: daily -description: Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and opens PRs to update pinned version constants +description: Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and syncs ecosystem_domains.json from gh-aw. Opens PRs for any updates found. permissions: contents: read issues: read @@ -13,7 +13,7 @@ network: allowed: [defaults] safe-outputs: create-pull-request: - max: 3 + max: 4 --- # Dependency Version Updater @@ -22,21 +22,22 @@ You are a dependency maintenance bot for the **ado-aw** project — a Rust CLI c ## Your Task -Check whether pinned version constants in `src/compile/common.rs` are up to date with the latest releases of their upstream dependencies. For each outdated constant, open a PR to update it. +Check whether pinned version constants in `src/compile/common.rs` are up to date with the latest releases of their upstream dependencies, and whether `src/data/ecosystem_domains.json` matches the upstream source. For each outdated item, open a PR to update it. -There are three dependencies to check: +There are four items to check: -| Constant | Upstream Repository | Example value | -|----------|-------------------|---------------| -| `AWF_VERSION` | [github/gh-aw-firewall](https://github.com/github/gh-aw-firewall) | `0.25.14` | -| `COPILOT_CLI_VERSION` | [github/copilot-cli](https://github.com/github/copilot-cli) | `1.0.6` | -| `MCPG_VERSION` | [github/gh-aw-mcpg](https://github.com/github/gh-aw-mcpg) | `0.1.9` | +| Item | Upstream Source | Local Path | +|------|---------------|------------| +| `AWF_VERSION` | [github/gh-aw-firewall](https://github.com/github/gh-aw-firewall) latest release | `src/compile/common.rs` | +| `COPILOT_CLI_VERSION` | [github/copilot-cli](https://github.com/github/copilot-cli) latest release | `src/compile/common.rs` | +| `MCPG_VERSION` | [github/gh-aw-mcpg](https://github.com/github/gh-aw-mcpg) latest release | `src/compile/common.rs` | +| `ecosystem_domains.json` | [github/gh-aw](https://github.com/github/gh-aw) `pkg/workflow/data/ecosystem_domains.json` on `main` | `src/data/ecosystem_domains.json` | -Run the following steps **independently for each dependency**. One may be up to date while the other is not. +Run the following steps **independently for each item**. One may be up to date while another is not. --- -## For each dependency: +## For AWF_VERSION, COPILOT_CLI_VERSION, MCPG_VERSION: ### Step 1: Get the Latest Release @@ -115,3 +116,53 @@ If the latest version is newer than the current constant: ``` - **Base branch**: `main` + +--- + +## For ecosystem_domains.json: + +### Step 1: Fetch the Upstream File + +Read the file `pkg/workflow/data/ecosystem_domains.json` from the `main` branch of [github/gh-aw](https://github.com/github/gh-aw). + +### Step 2: Read the Local File + +Read `src/data/ecosystem_domains.json` in this repository. + +### Step 3: Merge and Compare + +Our local file may contain **additional entries** that do not exist upstream (e.g., `"lean"`). These are ado-aw-specific additions and must be preserved. + +Merge the two files as follows: +- Start with all entries from the **upstream** file (updating any existing keys to match upstream values). +- **Add back** any keys that exist in the local file but **not** in the upstream file. These are ado-aw-specific entries. +- Maintain alphabetical key ordering in the final JSON. + +If the merged result is identical to the current local file, **skip** — everything is up to date. + +Before proceeding, also check whether a PR already exists with the title `chore: sync ecosystem_domains.json from gh-aw`. If one is already open, **skip** to avoid duplicates. + +### Step 4: Create a Sync PR + +If the merged result differs from the current local file: + +1. Write the merged JSON to `src/data/ecosystem_domains.json` (preserve 2-space indentation, one key per line, trailing newline). + +2. Create a pull request: + +- **Title**: `chore: sync ecosystem_domains.json from gh-aw` +- **Body**: + ```markdown + ## Ecosystem Domains Sync + + Merges upstream changes from [`github/gh-aw/pkg/workflow/data/ecosystem_domains.json`](https://github.com/github/gh-aw/blob/main/pkg/workflow/data/ecosystem_domains.json) into `src/data/ecosystem_domains.json`. + + This sync preserves any ado-aw-specific entries (keys not present upstream) while updating all shared entries to match the upstream source. + + This file defines the domain allowlists for ecosystem identifiers (e.g., `python`, `rust`, `node`) used in the `network.allowed` front matter field. + + --- + *This PR was opened automatically by the dependency version updater workflow.* + ``` + +- **Base branch**: `main` diff --git a/AGENTS.md b/AGENTS.md index cb051ba..58ed091 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,6 +21,9 @@ Alongside the correctly generated pipeline yaml, an agent file is generated from ├── src/ │ ├── main.rs # Entry point with clap CLI │ ├── allowed_hosts.rs # Core network allowlist definitions +│ ├── ecosystem_domains.rs # Ecosystem domain lookups (python, rust, node, etc.) +│ ├── data/ +│ │ └── ecosystem_domains.json # Ecosystem domain lists (synced from gh-aw) │ ├── compile/ # Pipeline compilation module │ │ ├── mod.rs # Module entry point and Compiler trait │ │ ├── common.rs # Shared helpers across targets @@ -190,9 +193,10 @@ teardown: # separate job AFTER safe outputs processing - bash: echo "Teardown job step" displayName: "Teardown step" network: # optional network policy (standalone target only) - allow: # additional allowed host patterns - - "*.mycompany.com" - blocked: # blocked host patterns (removes exact entries from the allow list) + allowed: # allowed host patterns and/or ecosystem identifiers + - python # ecosystem identifier — expands to Python/PyPI domains + - "*.mycompany.com" # raw domain pattern + blocked: # blocked host patterns or ecosystems (removes from allow list) - "evil.example.com" permissions: # optional ADO access token configuration read: my-read-arm-connection # ARM service connection for read-only ADO access (Stage 1 agent) @@ -784,7 +788,8 @@ If no passthrough env vars are needed, this marker is replaced with an empty str Should be replaced with the comma-separated domain list for AWF's `--allow-domains` flag. The list includes: 1. Core Azure DevOps/GitHub endpoints (from `allowed_hosts.rs`) 2. MCP-specific endpoints for each enabled MCP -3. User-specified additional hosts from `network.allow:` front matter +3. Ecosystem identifier expansions from `network.allowed:` (e.g., `python` → PyPI/pip domains) +4. User-specified additional hosts from `network.allowed:` front matter The output is formatted as a comma-separated string (e.g., `github.com,*.dev.azure.com,api.github.com`). @@ -1603,7 +1608,7 @@ mcp-servers: permissions: read: my-read-arm-connection network: - allow: + allowed: - "dev.azure.com" - "*.dev.azure.com" ``` @@ -1614,7 +1619,7 @@ network: 2. **Containerization**: Stdio MCP servers run as isolated Docker containers (per MCPG spec §3.2.1) 3. **Environment Isolation**: MCP containers are spawned by MCPG with only the configured environment variables 4. **MCPG Gateway**: All MCP traffic flows through the MCP Gateway which enforces tool-level filtering -5. **Network Isolation**: MCP containers run within the same AWF-isolated network. Users must explicitly allow external domains via `network.allow` +5. **Network Isolation**: MCP containers run within the same AWF-isolated network. Users must explicitly allow external domains via `network.allowed` ## Network Isolation (AWF) @@ -1664,26 +1669,60 @@ The following domains are always allowed (defined in `allowed_hosts.rs`): ### Adding Additional Hosts -Agents can specify additional allowed hosts in their front matter: +Agents can specify additional allowed hosts in their front matter using either ecosystem identifiers or raw domain patterns: ```yaml network: - allow: - - "*.mycompany.com" - - "api.external-service.com" + allowed: + - python # Ecosystem identifier — expands to Python/PyPI domains + - rust # Ecosystem identifier — expands to Rust/crates.io domains + - "*.mycompany.com" # Raw domain pattern + - "api.external-service.com" # Raw domain ``` -All hosts (core + MCP-specific + user-specified) are combined into a comma-separated domain list passed to AWF's `--allow-domains` flag. +#### Ecosystem Identifiers + +Ecosystem identifiers are shorthand names that expand to curated domain lists for common language ecosystems and services. The domain lists are sourced from [gh-aw](https://github.com/github/gh-aw) and kept up to date via an automated workflow. + +Available ecosystem identifiers include: + +| Identifier | Includes | +|------------|----------| +| `defaults` | Certificate infrastructure, Ubuntu mirrors, common package registries | +| `github` | GitHub domains (`github.com`, `*.githubusercontent.com`, etc.) | +| `local` | Loopback addresses (`localhost`, `127.0.0.1`, `::1`) | +| `containers` | Docker Hub, GHCR, Quay, Kubernetes | +| `linux-distros` | Debian, Alpine, Fedora, CentOS, Arch Linux package repositories | +| `dev-tools` | CI/CD and developer tool services (Codecov, Shields.io, Snyk, etc.) | +| `python` | PyPI, pip, Conda, Anaconda | +| `rust` | crates.io, rustup, static.rust-lang.org | +| `node` | npm, Yarn, pnpm, Bun, Deno, Node.js | +| `go` | proxy.golang.org, pkg.go.dev, Go module proxy | +| `java` | Maven Central, Gradle, JDK downloads | +| `dotnet` | NuGet, .NET SDK | +| `ruby` | RubyGems, Bundler | +| `swift` | Swift.org, CocoaPods | +| `terraform` | HashiCorp releases, Terraform registry | + +Additional ecosystems: `bazel`, `chrome`, `clojure`, `dart`, `deno`, `elixir`, `fonts`, `github-actions`, `haskell`, `julia`, `kotlin`, `lua`, `node-cdns`, `ocaml`, `perl`, `php`, `playwright`, `powershell`, `r`, `scala`, `zig`. + +The full domain lists are defined in `src/data/ecosystem_domains.json`. + +All hosts (core + MCP-specific + ecosystem expansions + user-specified) are combined into a comma-separated domain list passed to AWF's `--allow-domains` flag. #### Blocking Hosts -The `network.blocked` field removes hosts from the combined allowlist using **exact-string matching**. Blocking `"github.com"` removes only that exact entry — it does **not** remove wildcard variants like `"*.github.com"`. To fully block a domain and its subdomains, list both the exact host and the wildcard pattern: +The `network.blocked` field removes hosts from the combined allowlist. Both ecosystem identifiers and raw domain strings are supported. Blocking an ecosystem identifier removes all of its domains. Blocking a raw domain uses exact-string matching — blocking `"github.com"` does **not** also remove `"*.github.com"`. ```yaml network: + allowed: + - python + - node blocked: - - "github.com" - - "*.github.com" + - python # Remove all Python ecosystem domains + - "github.com" # Remove exact domain + - "*.github.com" # Remove wildcard variant too ``` ### Permissions (ADO Access Tokens) diff --git a/README.md b/README.md index eb7ded7..68b06d7 100644 --- a/README.md +++ b/README.md @@ -337,7 +337,7 @@ mcp-servers: ``` Custom MCP containers run inside the AWF network sandbox. Add any required -external domains to `network.allow`. +external domains to `network.allowed`. --- @@ -410,12 +410,12 @@ reachable. The allowlist is built from: 1. **Core domains** — Azure DevOps, GitHub, Microsoft auth, Azure storage 2. **MCP domains** — automatically added per enabled MCP -3. **User domains** — from `network.allow` in front matter +3. **User domains** — from `network.allowed` in front matter 4. **Minus blocked** — `network.blocked` entries are removed by exact match (wildcard patterns like `*.example.com` are not affected by blocking a specific subdomain) ```yaml network: - allow: + allowed: - "*.mycompany.com" - "api.external-service.com" blocked: diff --git a/prompts/create-ado-agentic-workflow.md b/prompts/create-ado-agentic-workflow.md index ed4234b..efd370b 100644 --- a/prompts/create-ado-agentic-workflow.md +++ b/prompts/create-ado-agentic-workflow.md @@ -328,7 +328,7 @@ teardown: # Separate job AFTER ProcessSafeOutputs Additional allowed domains beyond the built-in allowlist: ```yaml network: - allow: + allowed: - "*.mycompany.com" - "api.external-service.com" blocked: diff --git a/prompts/debug-ado-agentic-workflow.md b/prompts/debug-ado-agentic-workflow.md index 183e09f..c2d6863 100644 --- a/prompts/debug-ado-agentic-workflow.md +++ b/prompts/debug-ado-agentic-workflow.md @@ -67,20 +67,20 @@ This is the most complex stage — it involves downloading binaries, starting Do | Error Pattern | Likely Cause | Fix | |---------------|-------------|-----| -| `503 Service Unavailable` from Squid | Domain not in allowlist | Add domain to `network.allow` in front matter | +| `503 Service Unavailable` from Squid | Domain not in allowlist | Add domain to `network.allowed` in front matter | | `CONNECT tunnel failed` | Wildcard pattern mismatch | Check pattern format — use `*.example.com` not `example.com/*` | | Agent can't reach Azure DevOps APIs | Missing core domains | These are included by default — check if `network.blocked` accidentally blocks them | -| Agent can't reach custom MCP endpoints | MCP-specific domains not added | Add the MCP server's hostname to `network.allow` | +| Agent can't reach custom MCP endpoints | MCP-specific domains not added | Add the MCP server's hostname to `network.allowed` | **Checking the allowlist**: The compiler merges three domain sources: 1. Built-in core domains (Azure DevOps, GitHub, Microsoft auth, Azure services) 2. MCP-specific domains (auto-added per enabled MCP) -3. User-specified domains from `network.allow` +3. User-specified domains from `network.allowed` If the agent needs to reach `api.myservice.com`, add it: ```yaml network: - allow: + allowed: - "api.myservice.com" - "*.myservice.com" # if subdomains are also needed ``` @@ -362,7 +362,7 @@ Use this checklist to systematically rule out common issues: - [ ] **Compilation in sync**: `ado-aw check ` passes - [ ] **Correct stage identified**: Know which of the 3 jobs failed -- [ ] **Network allowlist**: All required domains are in `network.allow` or built-in +- [ ] **Network allowlist**: All required domains are in `network.allowed` or built-in - [ ] **MCP tools allowed**: Every tool the agent needs is in an `allowed:` list - [ ] **Permissions set**: `permissions.write` is present if write safe-outputs are configured - [ ] **Service connections authorized**: ARM connections are permitted for this pipeline diff --git a/prompts/update-ado-agentic-workflow.md b/prompts/update-ado-agentic-workflow.md index bc08bca..52eb483 100644 --- a/prompts/update-ado-agentic-workflow.md +++ b/prompts/update-ado-agentic-workflow.md @@ -175,7 +175,7 @@ mcp-servers: - get_status ``` -Custom MCPs **must** have an explicit `allowed:` list. Add any required external domains to `network.allow`. +Custom MCPs **must** have an explicit `allowed:` list. Add any required external domains to `network.allowed`. ### Adding Permissions @@ -248,7 +248,7 @@ Before finalizing any update, verify: 5. **Workspace consistency**: If `workspace: repo` is set, ensure `checkout:` has additional repositories. If only `self` is checked out, `workspace: repo` is unnecessary (the compiler warns about this). -6. **Network domains**: If new MCPs or external services are added, ensure required domains are in `network.allow`. +6. **Network domains**: If new MCPs or external services are added, ensure required domains are in `network.allowed`. 7. **Target compatibility**: If `target: 1es`, custom MCPs (with `container:`) are not supported — only built-in MCPs with service connections. diff --git a/src/allowed_hosts.rs b/src/allowed_hosts.rs index 84b886f..7e8f5c6 100644 --- a/src/allowed_hosts.rs +++ b/src/allowed_hosts.rs @@ -142,9 +142,10 @@ mod tests { #[test] fn test_lean_hosts() { - use crate::runtimes::lean::LEAN_REQUIRED_HOSTS; - assert!(LEAN_REQUIRED_HOSTS.contains(&"elan.lean-lang.org")); - assert!(LEAN_REQUIRED_HOSTS.contains(&"leanprover.github.io")); - assert!(LEAN_REQUIRED_HOSTS.contains(&"lean-lang.org")); + use crate::ecosystem_domains::get_ecosystem_domains; + let lean_hosts = get_ecosystem_domains("lean"); + assert!(lean_hosts.contains(&"elan.lean-lang.org".to_string())); + assert!(lean_hosts.contains(&"leanprover.github.io".to_string())); + assert!(lean_hosts.contains(&"lean-lang.org".to_string())); } } diff --git a/src/compile/extensions.rs b/src/compile/extensions.rs index 3da06f6..33c8702 100644 --- a/src/compile/extensions.rs +++ b/src/compile/extensions.rs @@ -332,7 +332,7 @@ extension_enum! { // ─── Lean 4 ────────────────────────────────────────────────────────── use crate::runtimes::lean::{ - self, LeanRuntimeConfig, LEAN_BASH_COMMANDS, LEAN_REQUIRED_HOSTS, + self, LeanRuntimeConfig, LEAN_BASH_COMMANDS, }; /// Lean 4 runtime extension. @@ -359,7 +359,7 @@ impl CompilerExtension for LeanExtension { } fn required_hosts(&self) -> Vec { - LEAN_REQUIRED_HOSTS.iter().map(|h| (*h).to_string()).collect() + vec!["lean".to_string()] } fn required_bash_commands(&self) -> Vec { @@ -857,9 +857,9 @@ mod tests { fn test_lean_required_hosts() { let ext = LeanExtension::new(LeanRuntimeConfig::Enabled(true)); let hosts = ext.required_hosts(); - assert!(hosts.contains(&"elan.lean-lang.org".to_string())); - assert!(hosts.contains(&"leanprover.github.io".to_string())); - assert!(hosts.contains(&"lean-lang.org".to_string())); + // Lean extension returns the ecosystem identifier; domain expansion + // happens in generate_allowed_domains(). + assert_eq!(hosts, vec!["lean".to_string()]); } #[test] diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 3e9e475..cafe7e5 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -29,6 +29,7 @@ use super::common::{ use super::extensions::{CompilerExtension, McpgServerConfig, McpgGatewayConfig, McpgConfig}; use super::types::{FrontMatter, McpConfig}; use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; +use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; use std::collections::HashSet; /// Standalone pipeline compiler. @@ -261,7 +262,7 @@ impl Compiler for StandaloneCompiler { /// `--allow-domains` flag. The list includes: /// 1. Core Azure DevOps/GitHub endpoints /// 2. MCP-specific endpoints for each enabled MCP -/// 3. User-specified additional hosts from network.allow +/// 3. User-specified additional hosts from network.allowed fn generate_allowed_domains( front_matter: &FrontMatter, extensions: &[super::extensions::Extension], @@ -283,7 +284,7 @@ fn generate_allowed_domains( let user_hosts: Vec = front_matter .network .as_ref() - .map(|n| n.allow.clone()) + .map(|n| n.allowed.clone()) .unwrap_or_default(); // Generate the allowlist by combining core + MCP + extension + user hosts @@ -306,44 +307,84 @@ fn generate_allowed_domains( } } - // Add extension-declared hosts (runtimes + first-party tools) + // Add extension-declared hosts (runtimes + first-party tools). + // Extensions may return ecosystem identifiers (e.g., "lean") which are + // expanded to their domain lists, or raw domain names. for ext in extensions { for host in ext.required_hosts() { - hosts.insert(host); + if is_ecosystem_identifier(&host) { + let domains = get_ecosystem_domains(&host); + if domains.is_empty() { + eprintln!( + "warning: extension '{}' requires unknown ecosystem '{}'; \ + no domains added", + ext.name(), + host + ); + } + for domain in domains { + hosts.insert(domain); + } + } else { + hosts.insert(host); + } } } // Add user-specified hosts (validated against DNS-safe characters) + // Entries may be ecosystem identifiers (e.g., "python", "rust") which + // expand to their domain lists, or raw domain names. for host in &user_hosts { - let valid_chars = !host.is_empty() - && host - .chars() - .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); - if !valid_chars { - anyhow::bail!( - "network.allow domain '{}' contains characters invalid in DNS names. \ - Only ASCII alphanumerics, '.', '-', and '*' are allowed.", - host - ); - } - if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { - anyhow::bail!( - "network.allow domain '{}' uses '*' in an unsupported position. \ - Wildcards must appear only as a leading prefix (e.g. '*.example.com').", - host - ); + if is_ecosystem_identifier(host) { + let domains = get_ecosystem_domains(host); + if domains.is_empty() && !is_known_ecosystem(host) { + eprintln!( + "warning: network.allowed contains unknown ecosystem identifier '{}'. \ + Known ecosystems: python, rust, node, go, java, etc. \ + If this is a domain name, it should contain a dot.", + host + ); + } + for domain in domains { + hosts.insert(domain); + } + } else { + let valid_chars = !host.is_empty() + && host + .chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); + if !valid_chars { + anyhow::bail!( + "network.allowed domain '{}' contains characters invalid in DNS names. \ + Only ASCII alphanumerics, '.', '-', and '*' are allowed.", + host + ); + } + if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { + anyhow::bail!( + "network.allowed domain '{}' uses '*' in an unsupported position. \ + Wildcards must appear only as a leading prefix (e.g. '*.example.com').", + host + ); + } + hosts.insert(host.clone()); } - hosts.insert(host.clone()); } - // Remove blocked hosts + // Remove blocked hosts (supports both ecosystem identifiers and raw domains) let blocked_hosts: Vec = front_matter .network .as_ref() .map(|n| n.blocked.clone()) .unwrap_or_default(); for blocked in &blocked_hosts { - hosts.remove(blocked); + if is_ecosystem_identifier(blocked) { + for domain in get_ecosystem_domains(blocked) { + hosts.remove(&domain); + } + } else { + hosts.remove(blocked); + } } // Sort for deterministic output @@ -1782,7 +1823,7 @@ mod tests { fn test_generate_allowed_domains_blocked_takes_precedence_over_allow() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["evil.example.com".to_string()], + allowed: vec!["evil.example.com".to_string()], blocked: vec!["evil.example.com".to_string()], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1808,7 +1849,7 @@ mod tests { fn test_generate_allowed_domains_user_allow_host_included() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["api.mycompany.com".to_string()], + allowed: vec!["api.mycompany.com".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1826,7 +1867,7 @@ mod tests { // also remove wildcard variants like "*.github.com". This is intentional. let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec![], + allowed: vec![], blocked: vec!["github.com".to_string()], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1842,7 +1883,7 @@ mod tests { fn test_generate_allowed_domains_invalid_host_returns_error() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["bad host!".to_string()], + allowed: vec!["bad host!".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1874,6 +1915,74 @@ mod tests { assert!(!domains.contains("elan.lean-lang.org"), "lean disabled should not add lean hosts"); } + // ─── ecosystem identifier tests ────────────────────────────────────────── + + #[test] + fn test_generate_allowed_domains_ecosystem_python_expands() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allowed: vec!["python".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("pypi.org"), "python ecosystem should include pypi.org"); + assert!(domains.contains("pip.pypa.io"), "python ecosystem should include pip.pypa.io"); + } + + #[test] + fn test_generate_allowed_domains_ecosystem_rust_expands() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allowed: vec!["rust".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("crates.io"), "rust ecosystem should include crates.io"); + assert!(domains.contains("static.rust-lang.org"), "rust ecosystem should include static.rust-lang.org"); + } + + #[test] + fn test_generate_allowed_domains_ecosystem_mixed_with_raw_domains() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allowed: vec!["python".to_string(), "api.custom.com".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("pypi.org"), "ecosystem domains should be present"); + assert!(domains.contains("api.custom.com"), "raw domains should be present"); + } + + #[test] + fn test_generate_allowed_domains_ecosystem_blocked_removes_all_ecosystem_domains() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allowed: vec!["python".to_string()], + blocked: vec!["python".to_string()], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(!domains.contains("pypi.org"), "blocked ecosystem should remove its domains"); + assert!(!domains.contains("pip.pypa.io"), "blocked ecosystem should remove all its domains"); + } + + #[test] + fn test_generate_allowed_domains_multiple_ecosystems() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allowed: vec!["python".to_string(), "node".to_string(), "rust".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("pypi.org"), "python domains present"); + assert!(domains.contains("registry.npmjs.org"), "node domains present"); + assert!(domains.contains("crates.io"), "rust domains present"); + } + // ─── generate_prepare_steps ────────────────────────────────────────────── #[test] diff --git a/src/compile/types.rs b/src/compile/types.rs index 38e4c69..86a7aa0 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -594,14 +594,15 @@ fn default_model() -> String { /// The domain allowlist is dynamically generated based on: /// - Core Azure DevOps/GitHub endpoints (always included) /// - MCP-specific endpoints for each enabled MCP -/// - User-specified additional hosts from `allow` field +/// - User-specified additional hosts from `allowed` field #[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] +#[serde(deny_unknown_fields)] pub struct NetworkConfig { /// Additional allowed host patterns (supports wildcards like *.example.com) /// Core Azure DevOps and GitHub hosts are always allowed. #[serde(default)] - pub allow: Vec, - /// Blocked host patterns (takes precedence over allow) + pub allowed: Vec, + /// Blocked host patterns (takes precedence over allowed) #[serde(default)] pub blocked: Vec, } @@ -1228,4 +1229,61 @@ Body let runtimes = fm.runtimes.as_ref().unwrap(); assert!(runtimes.lean.as_ref().unwrap().is_enabled()); } + + // ─── NetworkConfig deny_unknown_fields ────────────────────────────────── + + #[test] + fn test_network_config_rejects_old_allow_field() { + let content = r#"--- +name: "Test" +description: "Test" +network: + allow: + - "*.mycompany.com" +--- + +Body +"#; + let result = super::super::common::parse_markdown(content); + assert!(result.is_err(), "network.allow (old field name) should be rejected"); + let err = format!("{:#}", result.unwrap_err()); + assert!( + err.contains("unknown field `allow`"), + "error should mention unknown field `allow`, got: {}", + err + ); + } + + #[test] + fn test_network_config_accepts_allowed_field() { + let content = r#"--- +name: "Test" +description: "Test" +network: + allowed: + - "*.mycompany.com" +--- + +Body +"#; + let (fm, _) = super::super::common::parse_markdown(content).unwrap(); + let net = fm.network.unwrap(); + assert_eq!(net.allowed, vec!["*.mycompany.com"]); + assert!(net.blocked.is_empty()); + } + + #[test] + fn test_network_config_rejects_arbitrary_unknown_field() { + let content = r#"--- +name: "Test" +description: "Test" +network: + typo-field: true +--- + +Body +"#; + let result = super::super::common::parse_markdown(content); + assert!(result.is_err(), "unknown fields in network should be rejected"); + } } diff --git a/src/data/ecosystem_domains.json b/src/data/ecosystem_domains.json new file mode 100644 index 0000000..f7fb171 --- /dev/null +++ b/src/data/ecosystem_domains.json @@ -0,0 +1,252 @@ +{ + "bazel": ["releases.bazel.build", "mirror.bazel.build", "bcr.bazel.build", "blog.bazel.build"], + "chrome": ["*.google.com", "*.googleapis.com", "*.gvt1.com"], + "clojure": ["repo.clojars.org", "clojars.org"], + "containers": ["ghcr.io", "registry.hub.docker.com", "*.docker.io", "*.docker.com", "production.cloudflare.docker.com", "dl.k8s.io", "pkgs.k8s.io", "quay.io", "mcr.microsoft.com", "gcr.io", "auth.docker.io"], + "dart": ["pub.dev", "pub.dartlang.org", "storage.googleapis.com"], + "defaults": [ + "crl3.digicert.com", + "crl4.digicert.com", + "ocsp.digicert.com", + "ts-crl.ws.symantec.com", + "ts-ocsp.ws.symantec.com", + "crl.geotrust.com", + "ocsp.geotrust.com", + "crl.thawte.com", + "ocsp.thawte.com", + "crl.verisign.com", + "ocsp.verisign.com", + "crl.globalsign.com", + "ocsp.globalsign.com", + "crls.ssl.com", + "ocsp.ssl.com", + "crl.identrust.com", + "ocsp.identrust.com", + "crl.sectigo.com", + "ocsp.sectigo.com", + "crl.usertrust.com", + "ocsp.usertrust.com", + "s.symcb.com", + "s.symcd.com", + "json-schema.org", + "json.schemastore.org", + "archive.ubuntu.com", + "security.ubuntu.com", + "ppa.launchpad.net", + "keyserver.ubuntu.com", + "azure.archive.ubuntu.com", + "api.snapcraft.io", + "packagecloud.io", + "packages.cloud.google.com", + "packages.microsoft.com", + "www.googleapis.com" + ], + "deno": ["deno.land", "jsr.io", "googleapis.deno.dev", "fresh.deno.dev"], + "dev-tools": [ + "app.renovatebot.com", + "appveyor.com", + "badgen.net", + "circleci.com", + "codacy.com", + "codeclimate.com", + "codecov.io", + "coveralls.io", + "deepsource.io", + "drone.io", + "img.shields.io", + "readthedocs.io", + "readthedocs.org", + "renovatebot.com", + "semaphoreci.com", + "shields.io", + "snyk.io", + "sonarcloud.io", + "sonarqube.com", + "travis-ci.com" + ], + "dotnet": [ + "nuget.org", + "dist.nuget.org", + "api.nuget.org", + "nuget.pkg.github.com", + "dotnet.microsoft.com", + "pkgs.dev.azure.com", + "builds.dotnet.microsoft.com", + "dotnetcli.blob.core.windows.net", + "nugetregistryv2prod.blob.core.windows.net", + "azuresearch-usnc.nuget.org", + "azuresearch-ussc.nuget.org", + "dc.services.visualstudio.com", + "dot.net", + "ci.dot.net", + "www.microsoft.com", + "oneocsp.microsoft.com", + "*.vsblob.vsassets.io" + ], + "elixir": ["hex.pm", "repo.hex.pm", "builds.hex.pm", "cdn.hex.pm", "fastly.hex.pm"], + "fonts": ["fonts.googleapis.com", "fonts.gstatic.com"], + "github": [ + "*.githubusercontent.com", + "codeload.github.com", + "docs.github.com", + "github-cloud.githubusercontent.com", + "github-cloud.s3.amazonaws.com", + "github.blog", + "github.com", + "github.githubassets.com", + "lfs.github.com", + "objects.githubusercontent.com", + "raw.githubusercontent.com" + ], + "github-actions": [ + "productionresultssa0.blob.core.windows.net", + "productionresultssa1.blob.core.windows.net", + "productionresultssa2.blob.core.windows.net", + "productionresultssa3.blob.core.windows.net", + "productionresultssa4.blob.core.windows.net", + "productionresultssa5.blob.core.windows.net", + "productionresultssa6.blob.core.windows.net", + "productionresultssa7.blob.core.windows.net", + "productionresultssa8.blob.core.windows.net", + "productionresultssa9.blob.core.windows.net", + "productionresultssa10.blob.core.windows.net", + "productionresultssa11.blob.core.windows.net", + "productionresultssa12.blob.core.windows.net", + "productionresultssa13.blob.core.windows.net", + "productionresultssa14.blob.core.windows.net", + "productionresultssa15.blob.core.windows.net", + "productionresultssa16.blob.core.windows.net", + "productionresultssa17.blob.core.windows.net", + "productionresultssa18.blob.core.windows.net", + "productionresultssa19.blob.core.windows.net" + ], + "go": ["go.dev", "golang.org", "proxy.golang.org", "sum.golang.org", "pkg.go.dev", "goproxy.io", "storage.googleapis.com"], + "haskell": ["haskell.org", "*.hackage.haskell.org", "get-ghcup.haskell.org", "downloads.haskell.org"], + "java": [ + "www.java.com", + "jdk.java.net", + "api.adoptium.net", + "adoptium.net", + "repo.maven.apache.org", + "maven.apache.org", + "repo1.maven.org", + "maven.pkg.github.com", + "maven.oracle.com", + "repo.spring.io", + "gradle.org", + "services.gradle.org", + "plugins.gradle.org", + "plugins-artifacts.gradle.org", + "repo.grails.org", + "download.eclipse.org", + "download.oracle.com", + "jcenter.bintray.com", + "dlcdn.apache.org", + "archive.apache.org", + "download.java.net", + "api.foojay.io", + "cdn.azul.com", + "central.sonatype.com", + "maven.google.com", + "dl.google.com", + "repo.gradle.org", + "downloads.gradle-dn.com", + "develocity.apache.org", + "scans-in.gradle.com", + "ge.spockframework.org", + "*.gradle-enterprise.cloud", + "maven-central.storage-download.googleapis.com", + "repository.apache.org" + ], + "julia": ["pkg.julialang.org", "*.pkg.julialang.org", "julialang.org", "julialang-s3.julialang.org", "storage.julialang.net"], + "kotlin": ["download.jetbrains.com", "ge.jetbrains.com", "packages.jetbrains.team", "kotlin.bintray.com", "maven.pkg.jetbrains.space"], + "lean": ["elan.lean-lang.org", "leanprover.github.io", "lean-lang.org", "reservoir.lean-lang.org", "static.lean-lang.org"], + "linux-distros": [ + "deb.debian.org", + "security.debian.org", + "keyring.debian.org", + "packages.debian.org", + "debian.map.fastlydns.net", + "apt.llvm.org", + "dl.fedoraproject.org", + "mirrors.fedoraproject.org", + "download.fedoraproject.org", + "mirror.centos.org", + "vault.centos.org", + "dl-cdn.alpinelinux.org", + "pkg.alpinelinux.org", + "mirror.archlinux.org", + "archlinux.org", + "download.opensuse.org", + "cdn.redhat.com" + ], + "local": ["127.0.0.1", "::1", "localhost"], + "lua": ["luarocks.org", "www.luarocks.org"], + "node": [ + "npmjs.org", + "npmjs.com", + "www.npmjs.com", + "www.npmjs.org", + "registry.npmjs.com", + "registry.npmjs.org", + "skimdb.npmjs.com", + "npm.pkg.github.com", + "api.npms.io", + "nodejs.org", + "yarnpkg.com", + "registry.yarnpkg.com", + "repo.yarnpkg.com", + "deb.nodesource.com", + "get.pnpm.io", + "bun.sh", + "deno.land", + "jsr.io", + "registry.bower.io", + "esm.sh", + "googleapis.deno.dev", + "googlechromelabs.github.io", + "storage.googleapis.com", + "cdn.jsdelivr.net", + "telemetry.vercel.com" + ], + "node-cdns": ["cdn.jsdelivr.net", "data.jsdelivr.com", "code.jquery.com", "cdn.sheetjs.com"], + "ocaml": ["opam.ocaml.org", "ocaml.org", "erratique.ch"], + "perl": ["cpan.org", "www.cpan.org", "metacpan.org", "cpan.metacpan.org"], + "php": ["repo.packagist.org", "packagist.org", "getcomposer.org", "bitbucket.org"], + "playwright": ["playwright.download.prss.microsoft.com", "cdn.playwright.dev"], + "powershell": ["powershellgallery.com", "www.powershellgallery.com"], + "python": [ + "pypi.python.org", + "pypi.org", + "pip.pypa.io", + "*.pythonhosted.org", + "files.pythonhosted.org", + "bootstrap.pypa.io", + "conda.binstar.org", + "conda.anaconda.org", + "binstar.org", + "anaconda.org", + "repo.continuum.io", + "repo.anaconda.com", + "crates.io", + "index.crates.io", + "static.crates.io" + ], + "r": ["cloud.r-project.org", "cran.r-project.org", "cran.rstudio.com", "r-project.org"], + "ruby": ["rubygems.org", "api.rubygems.org", "rubygems.pkg.github.com", "bundler.rubygems.org", "gems.rubyforge.org", "gems.rubyonrails.org", "index.rubygems.org", "cache.ruby-lang.org", "*.rvm.io"], + "rust": ["crates.io", "index.crates.io", "static.crates.io", "sh.rustup.rs", "static.rust-lang.org"], + "scala": ["repo.scala-sbt.org", "scala-ci.typesafe.com", "repo.typesafe.com", "jitpack.io", "dl.bintray.com", "scala.jfrog.io"], + "swift": ["download.swift.org", "swift.org", "cocoapods.org", "cdn.cocoapods.org"], + "terraform": ["releases.hashicorp.com", "apt.releases.hashicorp.com", "yum.releases.hashicorp.com", "registry.terraform.io"], + "threat-detection": [ + "api.business.githubcopilot.com", + "api.enterprise.githubcopilot.com", + "api.github.com", + "api.githubcopilot.com", + "api.individual.githubcopilot.com", + "github.com", + "host.docker.internal", + "telemetry.enterprise.githubcopilot.com" + ], + "zig": ["ziglang.org", "pkg.machengine.org", "deps.files.ghostty.org"] +} diff --git a/src/ecosystem_domains.rs b/src/ecosystem_domains.rs new file mode 100644 index 0000000..77f6651 --- /dev/null +++ b/src/ecosystem_domains.rs @@ -0,0 +1,262 @@ +//! Ecosystem domain allowlists for network isolation. +//! +//! This module loads ecosystem-specific domain lists from an embedded JSON file +//! sourced from [gh-aw](https://github.com/github/gh-aw). The JSON maps ecosystem +//! identifiers (e.g., `"python"`, `"rust"`, `"node"`) to arrays of domains that +//! those ecosystems require for package management, registry access, etc. +//! +//! Users reference these identifiers in the `network.allowed` front matter field +//! instead of listing individual domains: +//! +//! ```yaml +//! network: +//! allowed: +//! - python +//! - rust +//! - "api.custom.com" +//! ``` + +use std::collections::{HashMap, HashSet}; +use std::sync::LazyLock; + +/// Embedded ecosystem domains JSON, sourced from gh-aw. +static ECOSYSTEM_JSON: &str = include_str!("data/ecosystem_domains.json"); + +/// Parsed ecosystem domain map, loaded once at first access. +static ECOSYSTEM_DOMAINS: LazyLock>> = LazyLock::new(|| { + serde_json::from_str(ECOSYSTEM_JSON).expect("embedded ecosystem_domains.json is invalid") +}); + +/// Compound ecosystems that expand to the union of multiple component ecosystems. +/// Mirrors gh-aw's `compoundEcosystems` mapping. +static COMPOUND_ECOSYSTEMS: LazyLock>> = + LazyLock::new(|| { + HashMap::from([( + "default-safe-outputs", + vec!["defaults", "dev-tools", "github", "local"], + )]) + }); + +/// Returns the domains for a given ecosystem identifier. +/// +/// Supports both direct ecosystem names (e.g., `"python"`) and compound +/// identifiers (e.g., `"default-safe-outputs"` which expands to +/// `defaults + dev-tools + github + local`). +/// +/// Returns an empty `Vec` if the identifier is unknown. +pub fn get_ecosystem_domains(identifier: &str) -> Vec { + get_ecosystem_domains_inner(identifier, 0) +} + +/// Recursive inner function with a depth guard to prevent stack overflow +/// from circular compound ecosystem references. +fn get_ecosystem_domains_inner(identifier: &str, depth: u8) -> Vec { + if depth > 8 { + eprintln!( + "warning: ecosystem expansion exceeded max depth for '{}'; \ + possible cycle in compound ecosystems", + identifier + ); + return vec![]; + } + + // Check compound ecosystems first + if let Some(components) = COMPOUND_ECOSYSTEMS.get(identifier) { + let mut domains: HashSet = HashSet::new(); + for component in components { + for d in get_ecosystem_domains_inner(component, depth + 1) { + domains.insert(d); + } + } + let mut result: Vec = domains.into_iter().collect(); + result.sort(); + return result; + } + + ECOSYSTEM_DOMAINS + .get(identifier) + .cloned() + .unwrap_or_default() +} + +/// Returns `true` if the identifier is a known ecosystem name +/// (either a direct key in the JSON or a compound identifier). +pub fn is_known_ecosystem(identifier: &str) -> bool { + ECOSYSTEM_DOMAINS.contains_key(identifier) || COMPOUND_ECOSYSTEMS.contains_key(identifier) +} + +/// Returns the sorted list of all known ecosystem names +/// (both direct and compound). +#[cfg(test)] +pub fn known_ecosystem_names() -> Vec { + let mut names: Vec = ECOSYSTEM_DOMAINS + .keys() + .cloned() + .chain(COMPOUND_ECOSYSTEMS.keys().map(|k| k.to_string())) + .collect(); + names.sort(); + names.dedup(); + names +} + +/// Heuristic: ecosystem identifiers are composed of lowercase ASCII letters, +/// digits, and hyphens (e.g., `"python"`, `"linux-distros"`, `"default-safe-outputs"`). +/// Domain names contain dots (e.g., `"pypi.org"`, `"*.example.com"`). +/// Strings with spaces, special characters, or other unexpected content are +/// treated as neither — they fall through to domain validation which will reject them. +pub fn is_ecosystem_identifier(value: &str) -> bool { + !value.is_empty() + && !value.contains('.') + && value + .chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-') +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_known_ecosystems_loaded() { + let names = known_ecosystem_names(); + assert!(names.contains(&"python".to_string())); + assert!(names.contains(&"rust".to_string())); + assert!(names.contains(&"node".to_string())); + assert!(names.contains(&"go".to_string())); + assert!(names.contains(&"defaults".to_string())); + assert!(names.len() > 20, "expected 20+ ecosystems, got {}", names.len()); + } + + #[test] + fn test_get_python_domains() { + let domains = get_ecosystem_domains("python"); + assert!(domains.contains(&"pypi.org".to_string())); + assert!(domains.contains(&"pip.pypa.io".to_string())); + assert!(!domains.is_empty()); + } + + #[test] + fn test_get_rust_domains() { + let domains = get_ecosystem_domains("rust"); + assert!(domains.contains(&"crates.io".to_string())); + assert!(domains.contains(&"static.rust-lang.org".to_string())); + } + + #[test] + fn test_get_node_domains() { + let domains = get_ecosystem_domains("node"); + assert!(domains.contains(&"registry.npmjs.org".to_string())); + assert!(domains.contains(&"nodejs.org".to_string())); + } + + #[test] + fn test_unknown_ecosystem_returns_empty() { + let domains = get_ecosystem_domains("nonexistent-ecosystem"); + assert!(domains.is_empty()); + } + + #[test] + fn test_is_known_ecosystem() { + assert!(is_known_ecosystem("python")); + assert!(is_known_ecosystem("rust")); + assert!(is_known_ecosystem("default-safe-outputs")); + assert!(!is_known_ecosystem("nonexistent")); + } + + #[test] + fn test_compound_ecosystem() { + let domains = get_ecosystem_domains("default-safe-outputs"); + assert!(!domains.is_empty()); + // Should include domains from defaults, dev-tools, github, local + assert!(domains.contains(&"github.com".to_string()), "should include github domains"); + assert!(domains.contains(&"localhost".to_string()), "should include local domains"); + } + + #[test] + fn test_is_ecosystem_identifier_heuristic() { + // Ecosystem identifiers (lowercase + hyphens) + assert!(is_ecosystem_identifier("python")); + assert!(is_ecosystem_identifier("rust")); + assert!(is_ecosystem_identifier("node")); + assert!(is_ecosystem_identifier("default-safe-outputs")); + assert!(is_ecosystem_identifier("linux-distros")); + + // Domain names (have dots) + assert!(!is_ecosystem_identifier("pypi.org")); + assert!(!is_ecosystem_identifier("*.example.com")); + assert!(!is_ecosystem_identifier("api.github.com")); + + // Invalid strings (special chars, spaces, uppercase) + assert!(!is_ecosystem_identifier("")); + assert!(!is_ecosystem_identifier("bad host!")); + assert!(!is_ecosystem_identifier("PYTHON")); + assert!(!is_ecosystem_identifier("hello world")); + } + + #[test] + fn test_defaults_ecosystem_has_expected_entries() { + let domains = get_ecosystem_domains("defaults"); + // Certificate infrastructure + assert!(domains.contains(&"ocsp.digicert.com".to_string())); + // Ubuntu + assert!(domains.contains(&"archive.ubuntu.com".to_string())); + } + + #[test] + fn test_embedded_json_parses_as_expected_schema() { + // Validates that the compile-time-embedded ecosystem_domains.json + // deserializes into HashMap> without panicking. + let parsed: Result>, _> = + serde_json::from_str(ECOSYSTEM_JSON); + assert!( + parsed.is_ok(), + "embedded ecosystem_domains.json failed to parse: {}", + parsed.unwrap_err() + ); + let map = parsed.unwrap(); + assert!(!map.is_empty(), "ecosystem_domains.json should not be empty"); + // Every ecosystem should have a non-empty domain list + for (key, domains) in &map { + assert!( + !domains.is_empty(), + "ecosystem '{}' has an empty domain list", + key + ); + } + } + + #[test] + fn test_malformed_json_rejected() { + // Ensures serde_json correctly rejects JSON that doesn't match + // the expected HashMap> schema, validating + // the safety of the .expect() guard on the LazyLock. + let bad_schema = r#"{"python": "not-a-list"}"#; + let result: Result>, _> = + serde_json::from_str(bad_schema); + assert!(result.is_err(), "schema mismatch should produce an error"); + + let bad_json = r#"{"python": [123, true]}"#; + let result: Result>, _> = + serde_json::from_str(bad_json); + assert!(result.is_err(), "non-string array elements should produce an error"); + + let invalid_json = r#"{not valid json"#; + let result: Result>, _> = + serde_json::from_str(invalid_json); + assert!(result.is_err(), "invalid JSON syntax should produce an error"); + } + + #[test] + fn test_depth_guard_prevents_deep_recursion() { + // get_ecosystem_domains_inner with depth > 8 returns empty + let result = get_ecosystem_domains_inner("python", 9); + assert!(result.is_empty(), "depth > 8 should short-circuit to empty"); + } + + #[test] + fn test_depth_guard_allows_normal_depth() { + // Normal calls (depth 0) should work fine + let result = get_ecosystem_domains_inner("python", 0); + assert!(!result.is_empty(), "depth 0 should return normal results"); + } +} diff --git a/src/main.rs b/src/main.rs index 7a213f6..a03518b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod allowed_hosts; mod compile; mod configure; mod detect; +mod ecosystem_domains; mod execute; mod fuzzy_schedule; mod init; diff --git a/src/runtimes/lean.rs b/src/runtimes/lean.rs index e90fe53..8a01a6b 100644 --- a/src/runtimes/lean.rs +++ b/src/runtimes/lean.rs @@ -75,14 +75,6 @@ pub struct LeanOptions { /// Bash commands that the Lean runtime adds to the allow-list. pub const LEAN_BASH_COMMANDS: &[&str] = &["lean", "lake", "elan"]; -/// Network domains required by the Lean runtime (elan installer + toolchain downloads). -/// github.com and *.githubusercontent.com are already in CORE_ALLOWED_HOSTS. -pub const LEAN_REQUIRED_HOSTS: &[&str] = &[ - "elan.lean-lang.org", - "leanprover.github.io", - "lean-lang.org", -]; - /// Generate the elan installation step for Lean 4. /// /// Installs elan (Lean toolchain manager) and the specified toolchain. diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index b19df6a..db499af 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -2523,7 +2523,7 @@ tools: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a valid leading wildcard (*.example.com) compiles successfully +/// Test that network.allowed with a valid leading wildcard (*.example.com) compiles successfully #[test] fn test_network_allow_valid_wildcard_compiles() { let temp_dir = std::env::temp_dir().join(format!( @@ -2534,9 +2534,9 @@ fn test_network_allow_valid_wildcard_compiles() { let input = r#"--- name: "Network Wildcard Agent" -description: "Agent with valid leading wildcard in network.allow" +description: "Agent with valid leading wildcard in network.allowed" network: - allow: + allowed: - "*.mycompany.com" - "api.external-service.com" --- @@ -2563,7 +2563,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a trailing wildcard (example.*) fails compilation +/// Test that network.allowed with a trailing wildcard (example.*) fails compilation #[test] fn test_network_allow_trailing_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2574,9 +2574,9 @@ fn test_network_allow_trailing_wildcard_fails() { let input = r#"--- name: "Network Trailing Wildcard Agent" -description: "Agent with trailing wildcard in network.allow" +description: "Agent with trailing wildcard in network.allowed" network: - allow: + allowed: - "example.*" --- @@ -2607,7 +2607,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a mid-string wildcard (ex*ample.com) fails compilation +/// Test that network.allowed with a mid-string wildcard (ex*ample.com) fails compilation #[test] fn test_network_allow_mid_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2618,9 +2618,9 @@ fn test_network_allow_mid_wildcard_fails() { let input = r#"--- name: "Network Mid Wildcard Agent" -description: "Agent with mid-string wildcard in network.allow" +description: "Agent with mid-string wildcard in network.allowed" network: - allow: + allowed: - "ex*ample.com" --- @@ -2651,7 +2651,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a double wildcard (*.*.com) fails compilation +/// Test that network.allowed with a double wildcard (*.*.com) fails compilation #[test] fn test_network_allow_double_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2662,9 +2662,9 @@ fn test_network_allow_double_wildcard_fails() { let input = r#"--- name: "Network Double Wildcard Agent" -description: "Agent with double wildcard in network.allow" +description: "Agent with double wildcard in network.allowed" network: - allow: + allowed: - "*.*.com" --- @@ -2695,7 +2695,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a bare '*' fails compilation +/// Test that network.allowed with a bare '*' fails compilation #[test] fn test_network_allow_bare_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2706,9 +2706,9 @@ fn test_network_allow_bare_wildcard_fails() { let input = r#"--- name: "Network Bare Wildcard Agent" -description: "Agent with bare wildcard in network.allow" +description: "Agent with bare wildcard in network.allowed" network: - allow: + allowed: - "*" --- diff --git a/tests/fixtures/azure-devops-mcp-agent.md b/tests/fixtures/azure-devops-mcp-agent.md index 43e4036..2dc9a4d 100644 --- a/tests/fixtures/azure-devops-mcp-agent.md +++ b/tests/fixtures/azure-devops-mcp-agent.md @@ -20,7 +20,7 @@ safe-outputs: create-work-item: work-item-type: Task network: - allow: + allowed: - "dev.azure.com" - "*.dev.azure.com" ---