From 6981875a0c4f5e06fbfb38cf48499089ff7dbfbc Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:15:17 +0100 Subject: [PATCH 01/12] feat: add ecosystem domain allowlists from gh-aw Add ecosystem identifier support for the network.allow front matter field. Users can now reference ecosystem names (e.g., python, rust, node) that expand to curated domain lists, matching gh-aw's approach. Changes: - Add src/data/ecosystem_domains.json sourced from gh-aw with 30+ ecosystem categories - Add src/ecosystem_domains.rs module with lookup, validation, and compound ecosystem support - Update generate_allowed_domains() to resolve ecosystem identifiers in both network.allow and network.blocked - Extend dependency updater workflow to sync ecosystem_domains.json from gh-aw upstream - Update AGENTS.md with ecosystem identifier documentation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/update-awf-version.md | 64 ++++-- AGENTS.md | 61 ++++-- src/compile/standalone.rs | 97 ++++++++- src/data/ecosystem_domains.json | 251 ++++++++++++++++++++++++ src/ecosystem_domains.rs | 189 ++++++++++++++++++ src/main.rs | 1 + 6 files changed, 639 insertions(+), 24 deletions(-) create mode 100644 src/data/ecosystem_domains.json create mode 100644 src/ecosystem_domains.rs diff --git a/.github/workflows/update-awf-version.md b/.github/workflows/update-awf-version.md index d7ba9a0..36f94da 100644 --- a/.github/workflows/update-awf-version.md +++ b/.github/workflows/update-awf-version.md @@ -1,7 +1,7 @@ --- on: schedule: daily -description: Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and opens PRs to update pinned version constants +description: Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and syncs ecosystem_domains.json from gh-aw. Opens PRs for any updates found. permissions: contents: read issues: read @@ -13,7 +13,7 @@ network: allowed: [defaults] safe-outputs: create-pull-request: - max: 3 + max: 4 --- # Dependency Version Updater @@ -22,21 +22,22 @@ You are a dependency maintenance bot for the **ado-aw** project — a Rust CLI c ## Your Task -Check whether pinned version constants in `src/compile/common.rs` are up to date with the latest releases of their upstream dependencies. For each outdated constant, open a PR to update it. +Check whether pinned version constants in `src/compile/common.rs` are up to date with the latest releases of their upstream dependencies, and whether `src/data/ecosystem_domains.json` matches the upstream source. For each outdated item, open a PR to update it. -There are three dependencies to check: +There are four items to check: -| Constant | Upstream Repository | Example value | -|----------|-------------------|---------------| -| `AWF_VERSION` | [github/gh-aw-firewall](https://github.com/github/gh-aw-firewall) | `0.25.14` | -| `COPILOT_CLI_VERSION` | [github/copilot-cli](https://github.com/github/copilot-cli) | `1.0.6` | -| `MCPG_VERSION` | [github/gh-aw-mcpg](https://github.com/github/gh-aw-mcpg) | `0.1.9` | +| Item | Upstream Source | Local Path | +|------|---------------|------------| +| `AWF_VERSION` | [github/gh-aw-firewall](https://github.com/github/gh-aw-firewall) latest release | `src/compile/common.rs` | +| `COPILOT_CLI_VERSION` | [github/copilot-cli](https://github.com/github/copilot-cli) latest release | `src/compile/common.rs` | +| `MCPG_VERSION` | [github/gh-aw-mcpg](https://github.com/github/gh-aw-mcpg) latest release | `src/compile/common.rs` | +| `ecosystem_domains.json` | [github/gh-aw](https://github.com/github/gh-aw) `pkg/workflow/data/ecosystem_domains.json` on `main` | `src/data/ecosystem_domains.json` | -Run the following steps **independently for each dependency**. One may be up to date while the other is not. +Run the following steps **independently for each item**. One may be up to date while another is not. --- -## For each dependency: +## For AWF_VERSION, COPILOT_CLI_VERSION, MCPG_VERSION: ### Step 1: Get the Latest Release @@ -115,3 +116,44 @@ If the latest version is newer than the current constant: ``` - **Base branch**: `main` + +--- + +## For ecosystem_domains.json: + +### Step 1: Fetch the Upstream File + +Read the file `pkg/workflow/data/ecosystem_domains.json` from the `main` branch of [github/gh-aw](https://github.com/github/gh-aw). + +### Step 2: Read the Local File + +Read `src/data/ecosystem_domains.json` in this repository. + +### Step 3: Compare Content + +Compare the upstream and local files. If they are identical, **skip** — the file is up to date. + +Before proceeding, also check whether a PR already exists with the title `chore: sync ecosystem_domains.json from gh-aw`. If one is already open, **skip** to avoid duplicates. + +### Step 4: Create a Sync PR + +If the files differ: + +1. Replace the contents of `src/data/ecosystem_domains.json` with the upstream version exactly as-is (preserve formatting). + +2. Create a pull request: + +- **Title**: `chore: sync ecosystem_domains.json from gh-aw` +- **Body**: + ```markdown + ## Ecosystem Domains Sync + + Updates `src/data/ecosystem_domains.json` to match the upstream source at [`github/gh-aw/pkg/workflow/data/ecosystem_domains.json`](https://github.com/github/gh-aw/blob/main/pkg/workflow/data/ecosystem_domains.json). + + This file defines the domain allowlists for ecosystem identifiers (e.g., `python`, `rust`, `node`) used in the `network.allow` front matter field. + + --- + *This PR was opened automatically by the dependency version updater workflow.* + ``` + +- **Base branch**: `main` diff --git a/AGENTS.md b/AGENTS.md index cb051ba..6d6c77c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,6 +21,9 @@ Alongside the correctly generated pipeline yaml, an agent file is generated from ├── src/ │ ├── main.rs # Entry point with clap CLI │ ├── allowed_hosts.rs # Core network allowlist definitions +│ ├── ecosystem_domains.rs # Ecosystem domain lookups (python, rust, node, etc.) +│ ├── data/ +│ │ └── ecosystem_domains.json # Ecosystem domain lists (synced from gh-aw) │ ├── compile/ # Pipeline compilation module │ │ ├── mod.rs # Module entry point and Compiler trait │ │ ├── common.rs # Shared helpers across targets @@ -190,9 +193,10 @@ teardown: # separate job AFTER safe outputs processing - bash: echo "Teardown job step" displayName: "Teardown step" network: # optional network policy (standalone target only) - allow: # additional allowed host patterns - - "*.mycompany.com" - blocked: # blocked host patterns (removes exact entries from the allow list) + allow: # allowed host patterns and/or ecosystem identifiers + - python # ecosystem identifier — expands to Python/PyPI domains + - "*.mycompany.com" # raw domain pattern + blocked: # blocked host patterns or ecosystems (removes from allow list) - "evil.example.com" permissions: # optional ADO access token configuration read: my-read-arm-connection # ARM service connection for read-only ADO access (Stage 1 agent) @@ -784,7 +788,8 @@ If no passthrough env vars are needed, this marker is replaced with an empty str Should be replaced with the comma-separated domain list for AWF's `--allow-domains` flag. The list includes: 1. Core Azure DevOps/GitHub endpoints (from `allowed_hosts.rs`) 2. MCP-specific endpoints for each enabled MCP -3. User-specified additional hosts from `network.allow:` front matter +3. Ecosystem identifier expansions from `network.allow:` (e.g., `python` → PyPI/pip domains) +4. User-specified additional hosts from `network.allow:` front matter The output is formatted as a comma-separated string (e.g., `github.com,*.dev.azure.com,api.github.com`). @@ -1664,26 +1669,60 @@ The following domains are always allowed (defined in `allowed_hosts.rs`): ### Adding Additional Hosts -Agents can specify additional allowed hosts in their front matter: +Agents can specify additional allowed hosts in their front matter using either ecosystem identifiers or raw domain patterns: ```yaml network: allow: - - "*.mycompany.com" - - "api.external-service.com" + - python # Ecosystem identifier — expands to Python/PyPI domains + - rust # Ecosystem identifier — expands to Rust/crates.io domains + - "*.mycompany.com" # Raw domain pattern + - "api.external-service.com" # Raw domain ``` -All hosts (core + MCP-specific + user-specified) are combined into a comma-separated domain list passed to AWF's `--allow-domains` flag. +#### Ecosystem Identifiers + +Ecosystem identifiers are shorthand names that expand to curated domain lists for common language ecosystems and services. The domain lists are sourced from [gh-aw](https://github.com/github/gh-aw) and kept up to date via an automated workflow. + +Available ecosystem identifiers include: + +| Identifier | Includes | +|------------|----------| +| `defaults` | Certificate infrastructure, Ubuntu mirrors, common package registries | +| `github` | GitHub domains (`github.com`, `*.githubusercontent.com`, etc.) | +| `local` | Loopback addresses (`localhost`, `127.0.0.1`, `::1`) | +| `containers` | Docker Hub, GHCR, Quay, Kubernetes | +| `linux-distros` | Debian, Alpine, Fedora, CentOS, Arch Linux package repositories | +| `dev-tools` | CI/CD and developer tool services (Codecov, Shields.io, Snyk, etc.) | +| `python` | PyPI, pip, Conda, Anaconda | +| `rust` | crates.io, rustup, static.rust-lang.org | +| `node` | npm, Yarn, pnpm, Bun, Deno, Node.js | +| `go` | proxy.golang.org, pkg.go.dev, Go module proxy | +| `java` | Maven Central, Gradle, JDK downloads | +| `dotnet` | NuGet, .NET SDK | +| `ruby` | RubyGems, Bundler | +| `swift` | Swift.org, CocoaPods | +| `terraform` | HashiCorp releases, Terraform registry | + +Additional ecosystems: `bazel`, `chrome`, `clojure`, `dart`, `deno`, `elixir`, `fonts`, `github-actions`, `haskell`, `julia`, `kotlin`, `lua`, `node-cdns`, `ocaml`, `perl`, `php`, `playwright`, `powershell`, `r`, `scala`, `zig`. + +The full domain lists are defined in `src/data/ecosystem_domains.json`. + +All hosts (core + MCP-specific + ecosystem expansions + user-specified) are combined into a comma-separated domain list passed to AWF's `--allow-domains` flag. #### Blocking Hosts -The `network.blocked` field removes hosts from the combined allowlist using **exact-string matching**. Blocking `"github.com"` removes only that exact entry — it does **not** remove wildcard variants like `"*.github.com"`. To fully block a domain and its subdomains, list both the exact host and the wildcard pattern: +The `network.blocked` field removes hosts from the combined allowlist. Both ecosystem identifiers and raw domain strings are supported. Blocking an ecosystem identifier removes all of its domains. Blocking a raw domain uses exact-string matching — blocking `"github.com"` does **not** also remove `"*.github.com"`. ```yaml network: + allow: + - python + - node blocked: - - "github.com" - - "*.github.com" + - python # Remove all Python ecosystem domains + - "github.com" # Remove exact domain + - "*.github.com" # Remove wildcard variant too ``` ### Permissions (ADO Access Tokens) diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 3e9e475..e5e786d 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -29,6 +29,7 @@ use super::common::{ use super::extensions::{CompilerExtension, McpgServerConfig, McpgGatewayConfig, McpgConfig}; use super::types::{FrontMatter, McpConfig}; use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; +use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; use std::collections::HashSet; /// Standalone pipeline compiler. @@ -314,7 +315,25 @@ fn generate_allowed_domains( } // Add user-specified hosts (validated against DNS-safe characters) + // Entries may be ecosystem identifiers (e.g., "python", "rust") which + // expand to their domain lists, or raw domain names. for host in &user_hosts { + if is_ecosystem_identifier(host) { + let domains = get_ecosystem_domains(host); + if domains.is_empty() && !is_known_ecosystem(host) { + eprintln!( + "warning: network.allow contains unknown ecosystem identifier '{}'. \ + Known ecosystems: python, rust, node, go, java, etc. \ + If this is a domain name, it should contain a dot.", + host + ); + } + for domain in domains { + hosts.insert(domain); + } + continue; + } + let valid_chars = !host.is_empty() && host .chars() @@ -336,14 +355,20 @@ fn generate_allowed_domains( hosts.insert(host.clone()); } - // Remove blocked hosts + // Remove blocked hosts (supports both ecosystem identifiers and raw domains) let blocked_hosts: Vec = front_matter .network .as_ref() .map(|n| n.blocked.clone()) .unwrap_or_default(); for blocked in &blocked_hosts { - hosts.remove(blocked); + if is_ecosystem_identifier(blocked) { + for domain in get_ecosystem_domains(blocked) { + hosts.remove(&domain); + } + } else { + hosts.remove(blocked); + } } // Sort for deterministic output @@ -1874,6 +1899,74 @@ mod tests { assert!(!domains.contains("elan.lean-lang.org"), "lean disabled should not add lean hosts"); } + // ─── ecosystem identifier tests ────────────────────────────────────────── + + #[test] + fn test_generate_allowed_domains_ecosystem_python_expands() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["python".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("pypi.org"), "python ecosystem should include pypi.org"); + assert!(domains.contains("pip.pypa.io"), "python ecosystem should include pip.pypa.io"); + } + + #[test] + fn test_generate_allowed_domains_ecosystem_rust_expands() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["rust".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("crates.io"), "rust ecosystem should include crates.io"); + assert!(domains.contains("static.rust-lang.org"), "rust ecosystem should include static.rust-lang.org"); + } + + #[test] + fn test_generate_allowed_domains_ecosystem_mixed_with_raw_domains() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["python".to_string(), "api.custom.com".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("pypi.org"), "ecosystem domains should be present"); + assert!(domains.contains("api.custom.com"), "raw domains should be present"); + } + + #[test] + fn test_generate_allowed_domains_ecosystem_blocked_removes_all_ecosystem_domains() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["python".to_string()], + blocked: vec!["python".to_string()], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(!domains.contains("pypi.org"), "blocked ecosystem should remove its domains"); + assert!(!domains.contains("pip.pypa.io"), "blocked ecosystem should remove all its domains"); + } + + #[test] + fn test_generate_allowed_domains_multiple_ecosystems() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["python".to_string(), "node".to_string(), "rust".to_string()], + blocked: vec![], + }); + let exts = super::super::extensions::collect_extensions(&fm); + let domains = generate_allowed_domains(&fm, &exts).unwrap(); + assert!(domains.contains("pypi.org"), "python domains present"); + assert!(domains.contains("registry.npmjs.org"), "node domains present"); + assert!(domains.contains("crates.io"), "rust domains present"); + } + // ─── generate_prepare_steps ────────────────────────────────────────────── #[test] diff --git a/src/data/ecosystem_domains.json b/src/data/ecosystem_domains.json new file mode 100644 index 0000000..d7aad0d --- /dev/null +++ b/src/data/ecosystem_domains.json @@ -0,0 +1,251 @@ +{ + "bazel": ["releases.bazel.build", "mirror.bazel.build", "bcr.bazel.build", "blog.bazel.build"], + "chrome": ["*.google.com", "*.googleapis.com", "*.gvt1.com"], + "clojure": ["repo.clojars.org", "clojars.org"], + "containers": ["ghcr.io", "registry.hub.docker.com", "*.docker.io", "*.docker.com", "production.cloudflare.docker.com", "dl.k8s.io", "pkgs.k8s.io", "quay.io", "mcr.microsoft.com", "gcr.io", "auth.docker.io"], + "dart": ["pub.dev", "pub.dartlang.org", "storage.googleapis.com"], + "defaults": [ + "crl3.digicert.com", + "crl4.digicert.com", + "ocsp.digicert.com", + "ts-crl.ws.symantec.com", + "ts-ocsp.ws.symantec.com", + "crl.geotrust.com", + "ocsp.geotrust.com", + "crl.thawte.com", + "ocsp.thawte.com", + "crl.verisign.com", + "ocsp.verisign.com", + "crl.globalsign.com", + "ocsp.globalsign.com", + "crls.ssl.com", + "ocsp.ssl.com", + "crl.identrust.com", + "ocsp.identrust.com", + "crl.sectigo.com", + "ocsp.sectigo.com", + "crl.usertrust.com", + "ocsp.usertrust.com", + "s.symcb.com", + "s.symcd.com", + "json-schema.org", + "json.schemastore.org", + "archive.ubuntu.com", + "security.ubuntu.com", + "ppa.launchpad.net", + "keyserver.ubuntu.com", + "azure.archive.ubuntu.com", + "api.snapcraft.io", + "packagecloud.io", + "packages.cloud.google.com", + "packages.microsoft.com", + "www.googleapis.com" + ], + "deno": ["deno.land", "jsr.io", "googleapis.deno.dev", "fresh.deno.dev"], + "dev-tools": [ + "app.renovatebot.com", + "appveyor.com", + "badgen.net", + "circleci.com", + "codacy.com", + "codeclimate.com", + "codecov.io", + "coveralls.io", + "deepsource.io", + "drone.io", + "img.shields.io", + "readthedocs.io", + "readthedocs.org", + "renovatebot.com", + "semaphoreci.com", + "shields.io", + "snyk.io", + "sonarcloud.io", + "sonarqube.com", + "travis-ci.com" + ], + "dotnet": [ + "nuget.org", + "dist.nuget.org", + "api.nuget.org", + "nuget.pkg.github.com", + "dotnet.microsoft.com", + "pkgs.dev.azure.com", + "builds.dotnet.microsoft.com", + "dotnetcli.blob.core.windows.net", + "nugetregistryv2prod.blob.core.windows.net", + "azuresearch-usnc.nuget.org", + "azuresearch-ussc.nuget.org", + "dc.services.visualstudio.com", + "dot.net", + "ci.dot.net", + "www.microsoft.com", + "oneocsp.microsoft.com", + "*.vsblob.vsassets.io" + ], + "elixir": ["hex.pm", "repo.hex.pm", "builds.hex.pm", "cdn.hex.pm", "fastly.hex.pm"], + "fonts": ["fonts.googleapis.com", "fonts.gstatic.com"], + "github": [ + "*.githubusercontent.com", + "codeload.github.com", + "docs.github.com", + "github-cloud.githubusercontent.com", + "github-cloud.s3.amazonaws.com", + "github.blog", + "github.com", + "github.githubassets.com", + "lfs.github.com", + "objects.githubusercontent.com", + "raw.githubusercontent.com" + ], + "github-actions": [ + "productionresultssa0.blob.core.windows.net", + "productionresultssa1.blob.core.windows.net", + "productionresultssa2.blob.core.windows.net", + "productionresultssa3.blob.core.windows.net", + "productionresultssa4.blob.core.windows.net", + "productionresultssa5.blob.core.windows.net", + "productionresultssa6.blob.core.windows.net", + "productionresultssa7.blob.core.windows.net", + "productionresultssa8.blob.core.windows.net", + "productionresultssa9.blob.core.windows.net", + "productionresultssa10.blob.core.windows.net", + "productionresultssa11.blob.core.windows.net", + "productionresultssa12.blob.core.windows.net", + "productionresultssa13.blob.core.windows.net", + "productionresultssa14.blob.core.windows.net", + "productionresultssa15.blob.core.windows.net", + "productionresultssa16.blob.core.windows.net", + "productionresultssa17.blob.core.windows.net", + "productionresultssa18.blob.core.windows.net", + "productionresultssa19.blob.core.windows.net" + ], + "go": ["go.dev", "golang.org", "proxy.golang.org", "sum.golang.org", "pkg.go.dev", "goproxy.io", "storage.googleapis.com"], + "haskell": ["haskell.org", "*.hackage.haskell.org", "get-ghcup.haskell.org", "downloads.haskell.org"], + "java": [ + "www.java.com", + "jdk.java.net", + "api.adoptium.net", + "adoptium.net", + "repo.maven.apache.org", + "maven.apache.org", + "repo1.maven.org", + "maven.pkg.github.com", + "maven.oracle.com", + "repo.spring.io", + "gradle.org", + "services.gradle.org", + "plugins.gradle.org", + "plugins-artifacts.gradle.org", + "repo.grails.org", + "download.eclipse.org", + "download.oracle.com", + "jcenter.bintray.com", + "dlcdn.apache.org", + "archive.apache.org", + "download.java.net", + "api.foojay.io", + "cdn.azul.com", + "central.sonatype.com", + "maven.google.com", + "dl.google.com", + "repo.gradle.org", + "downloads.gradle-dn.com", + "develocity.apache.org", + "scans-in.gradle.com", + "ge.spockframework.org", + "*.gradle-enterprise.cloud", + "maven-central.storage-download.googleapis.com", + "repository.apache.org" + ], + "julia": ["pkg.julialang.org", "*.pkg.julialang.org", "julialang.org", "julialang-s3.julialang.org", "storage.julialang.net"], + "kotlin": ["download.jetbrains.com", "ge.jetbrains.com", "packages.jetbrains.team", "kotlin.bintray.com", "maven.pkg.jetbrains.space"], + "linux-distros": [ + "deb.debian.org", + "security.debian.org", + "keyring.debian.org", + "packages.debian.org", + "debian.map.fastlydns.net", + "apt.llvm.org", + "dl.fedoraproject.org", + "mirrors.fedoraproject.org", + "download.fedoraproject.org", + "mirror.centos.org", + "vault.centos.org", + "dl-cdn.alpinelinux.org", + "pkg.alpinelinux.org", + "mirror.archlinux.org", + "archlinux.org", + "download.opensuse.org", + "cdn.redhat.com" + ], + "local": ["127.0.0.1", "::1", "localhost"], + "lua": ["luarocks.org", "www.luarocks.org"], + "node": [ + "npmjs.org", + "npmjs.com", + "www.npmjs.com", + "www.npmjs.org", + "registry.npmjs.com", + "registry.npmjs.org", + "skimdb.npmjs.com", + "npm.pkg.github.com", + "api.npms.io", + "nodejs.org", + "yarnpkg.com", + "registry.yarnpkg.com", + "repo.yarnpkg.com", + "deb.nodesource.com", + "get.pnpm.io", + "bun.sh", + "deno.land", + "jsr.io", + "registry.bower.io", + "esm.sh", + "googleapis.deno.dev", + "googlechromelabs.github.io", + "storage.googleapis.com", + "cdn.jsdelivr.net", + "telemetry.vercel.com" + ], + "node-cdns": ["cdn.jsdelivr.net", "data.jsdelivr.com", "code.jquery.com", "cdn.sheetjs.com"], + "ocaml": ["opam.ocaml.org", "ocaml.org", "erratique.ch"], + "perl": ["cpan.org", "www.cpan.org", "metacpan.org", "cpan.metacpan.org"], + "php": ["repo.packagist.org", "packagist.org", "getcomposer.org", "bitbucket.org"], + "playwright": ["playwright.download.prss.microsoft.com", "cdn.playwright.dev"], + "powershell": ["powershellgallery.com", "www.powershellgallery.com"], + "python": [ + "pypi.python.org", + "pypi.org", + "pip.pypa.io", + "*.pythonhosted.org", + "files.pythonhosted.org", + "bootstrap.pypa.io", + "conda.binstar.org", + "conda.anaconda.org", + "binstar.org", + "anaconda.org", + "repo.continuum.io", + "repo.anaconda.com", + "crates.io", + "index.crates.io", + "static.crates.io" + ], + "r": ["cloud.r-project.org", "cran.r-project.org", "cran.rstudio.com", "r-project.org"], + "ruby": ["rubygems.org", "api.rubygems.org", "rubygems.pkg.github.com", "bundler.rubygems.org", "gems.rubyforge.org", "gems.rubyonrails.org", "index.rubygems.org", "cache.ruby-lang.org", "*.rvm.io"], + "rust": ["crates.io", "index.crates.io", "static.crates.io", "sh.rustup.rs", "static.rust-lang.org"], + "scala": ["repo.scala-sbt.org", "scala-ci.typesafe.com", "repo.typesafe.com", "jitpack.io", "dl.bintray.com", "scala.jfrog.io"], + "swift": ["download.swift.org", "swift.org", "cocoapods.org", "cdn.cocoapods.org"], + "terraform": ["releases.hashicorp.com", "apt.releases.hashicorp.com", "yum.releases.hashicorp.com", "registry.terraform.io"], + "threat-detection": [ + "api.business.githubcopilot.com", + "api.enterprise.githubcopilot.com", + "api.github.com", + "api.githubcopilot.com", + "api.individual.githubcopilot.com", + "github.com", + "host.docker.internal", + "telemetry.enterprise.githubcopilot.com" + ], + "zig": ["ziglang.org", "pkg.machengine.org", "deps.files.ghostty.org"] +} diff --git a/src/ecosystem_domains.rs b/src/ecosystem_domains.rs new file mode 100644 index 0000000..7666d79 --- /dev/null +++ b/src/ecosystem_domains.rs @@ -0,0 +1,189 @@ +//! Ecosystem domain allowlists for network isolation. +//! +//! This module loads ecosystem-specific domain lists from an embedded JSON file +//! sourced from [gh-aw](https://github.com/github/gh-aw). The JSON maps ecosystem +//! identifiers (e.g., `"python"`, `"rust"`, `"node"`) to arrays of domains that +//! those ecosystems require for package management, registry access, etc. +//! +//! Users reference these identifiers in the `network.allow` front matter field +//! instead of listing individual domains: +//! +//! ```yaml +//! network: +//! allowed: +//! - python +//! - rust +//! - "api.custom.com" +//! ``` + +use std::collections::{HashMap, HashSet}; +use std::sync::LazyLock; + +/// Embedded ecosystem domains JSON, sourced from gh-aw. +static ECOSYSTEM_JSON: &str = include_str!("data/ecosystem_domains.json"); + +/// Parsed ecosystem domain map, loaded once at first access. +static ECOSYSTEM_DOMAINS: LazyLock>> = LazyLock::new(|| { + serde_json::from_str(ECOSYSTEM_JSON).expect("embedded ecosystem_domains.json is invalid") +}); + +/// Compound ecosystems that expand to the union of multiple component ecosystems. +/// Mirrors gh-aw's `compoundEcosystems` mapping. +static COMPOUND_ECOSYSTEMS: LazyLock>> = + LazyLock::new(|| { + HashMap::from([( + "default-safe-outputs", + vec!["defaults", "dev-tools", "github", "local"], + )]) + }); + +/// Returns the domains for a given ecosystem identifier. +/// +/// Supports both direct ecosystem names (e.g., `"python"`) and compound +/// identifiers (e.g., `"default-safe-outputs"` which expands to +/// `defaults + dev-tools + github + local`). +/// +/// Returns an empty `Vec` if the identifier is unknown. +pub fn get_ecosystem_domains(identifier: &str) -> Vec { + // Check compound ecosystems first + if let Some(components) = COMPOUND_ECOSYSTEMS.get(identifier) { + let mut domains: HashSet = HashSet::new(); + for component in components { + for d in get_ecosystem_domains(component) { + domains.insert(d); + } + } + let mut result: Vec = domains.into_iter().collect(); + result.sort(); + return result; + } + + ECOSYSTEM_DOMAINS + .get(identifier) + .cloned() + .unwrap_or_default() +} + +/// Returns `true` if the identifier is a known ecosystem name +/// (either a direct key in the JSON or a compound identifier). +pub fn is_known_ecosystem(identifier: &str) -> bool { + ECOSYSTEM_DOMAINS.contains_key(identifier) || COMPOUND_ECOSYSTEMS.contains_key(identifier) +} + +/// Returns the sorted list of all known ecosystem names +/// (both direct and compound). +#[cfg(test)] +pub fn known_ecosystem_names() -> Vec { + let mut names: Vec = ECOSYSTEM_DOMAINS + .keys() + .cloned() + .chain(COMPOUND_ECOSYSTEMS.keys().map(|k| k.to_string())) + .collect(); + names.sort(); + names.dedup(); + names +} + +/// Heuristic: ecosystem identifiers are composed of lowercase ASCII letters, +/// digits, and hyphens (e.g., `"python"`, `"linux-distros"`, `"default-safe-outputs"`). +/// Domain names contain dots (e.g., `"pypi.org"`, `"*.example.com"`). +/// Strings with spaces, special characters, or other unexpected content are +/// treated as neither — they fall through to domain validation which will reject them. +pub fn is_ecosystem_identifier(value: &str) -> bool { + !value.is_empty() + && !value.contains('.') + && value + .chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-') +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_known_ecosystems_loaded() { + let names = known_ecosystem_names(); + assert!(names.contains(&"python".to_string())); + assert!(names.contains(&"rust".to_string())); + assert!(names.contains(&"node".to_string())); + assert!(names.contains(&"go".to_string())); + assert!(names.contains(&"defaults".to_string())); + assert!(names.len() > 20, "expected 20+ ecosystems, got {}", names.len()); + } + + #[test] + fn test_get_python_domains() { + let domains = get_ecosystem_domains("python"); + assert!(domains.contains(&"pypi.org".to_string())); + assert!(domains.contains(&"pip.pypa.io".to_string())); + assert!(!domains.is_empty()); + } + + #[test] + fn test_get_rust_domains() { + let domains = get_ecosystem_domains("rust"); + assert!(domains.contains(&"crates.io".to_string())); + assert!(domains.contains(&"static.rust-lang.org".to_string())); + } + + #[test] + fn test_get_node_domains() { + let domains = get_ecosystem_domains("node"); + assert!(domains.contains(&"registry.npmjs.org".to_string())); + assert!(domains.contains(&"nodejs.org".to_string())); + } + + #[test] + fn test_unknown_ecosystem_returns_empty() { + let domains = get_ecosystem_domains("nonexistent-ecosystem"); + assert!(domains.is_empty()); + } + + #[test] + fn test_is_known_ecosystem() { + assert!(is_known_ecosystem("python")); + assert!(is_known_ecosystem("rust")); + assert!(is_known_ecosystem("default-safe-outputs")); + assert!(!is_known_ecosystem("nonexistent")); + } + + #[test] + fn test_compound_ecosystem() { + let domains = get_ecosystem_domains("default-safe-outputs"); + assert!(!domains.is_empty()); + // Should include domains from defaults, dev-tools, github, local + assert!(domains.contains(&"github.com".to_string()), "should include github domains"); + assert!(domains.contains(&"localhost".to_string()), "should include local domains"); + } + + #[test] + fn test_is_ecosystem_identifier_heuristic() { + // Ecosystem identifiers (lowercase + hyphens) + assert!(is_ecosystem_identifier("python")); + assert!(is_ecosystem_identifier("rust")); + assert!(is_ecosystem_identifier("node")); + assert!(is_ecosystem_identifier("default-safe-outputs")); + assert!(is_ecosystem_identifier("linux-distros")); + + // Domain names (have dots) + assert!(!is_ecosystem_identifier("pypi.org")); + assert!(!is_ecosystem_identifier("*.example.com")); + assert!(!is_ecosystem_identifier("api.github.com")); + + // Invalid strings (special chars, spaces, uppercase) + assert!(!is_ecosystem_identifier("")); + assert!(!is_ecosystem_identifier("bad host!")); + assert!(!is_ecosystem_identifier("PYTHON")); + assert!(!is_ecosystem_identifier("hello world")); + } + + #[test] + fn test_defaults_ecosystem_has_expected_entries() { + let domains = get_ecosystem_domains("defaults"); + // Certificate infrastructure + assert!(domains.contains(&"ocsp.digicert.com".to_string())); + // Ubuntu + assert!(domains.contains(&"archive.ubuntu.com".to_string())); + } +} diff --git a/src/main.rs b/src/main.rs index 7a213f6..a03518b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod allowed_hosts; mod compile; mod configure; mod detect; +mod ecosystem_domains; mod execute; mod fuzzy_schedule; mod init; From 5b6fda4984a3b4dd3c7f0c86d577d83a1293dec1 Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:21:52 +0100 Subject: [PATCH 02/12] refactor: consolidate Lean hosts into ecosystem_domains.json Move Lean runtime domains from hardcoded LEAN_REQUIRED_HOSTS constant into ecosystem_domains.json. The Lean extension now returns the ecosystem identifier 'lean' from required_hosts(), and generate_allowed_domains() resolves it via the JSON like any other ecosystem. Extension hosts now support ecosystem identifiers too. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/allowed_hosts.rs | 9 +++++---- src/compile/extensions.rs | 10 +++++----- src/compile/standalone.rs | 12 ++++++++++-- src/data/ecosystem_domains.json | 1 + 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/allowed_hosts.rs b/src/allowed_hosts.rs index 84b886f..7e8f5c6 100644 --- a/src/allowed_hosts.rs +++ b/src/allowed_hosts.rs @@ -142,9 +142,10 @@ mod tests { #[test] fn test_lean_hosts() { - use crate::runtimes::lean::LEAN_REQUIRED_HOSTS; - assert!(LEAN_REQUIRED_HOSTS.contains(&"elan.lean-lang.org")); - assert!(LEAN_REQUIRED_HOSTS.contains(&"leanprover.github.io")); - assert!(LEAN_REQUIRED_HOSTS.contains(&"lean-lang.org")); + use crate::ecosystem_domains::get_ecosystem_domains; + let lean_hosts = get_ecosystem_domains("lean"); + assert!(lean_hosts.contains(&"elan.lean-lang.org".to_string())); + assert!(lean_hosts.contains(&"leanprover.github.io".to_string())); + assert!(lean_hosts.contains(&"lean-lang.org".to_string())); } } diff --git a/src/compile/extensions.rs b/src/compile/extensions.rs index 3da06f6..33c8702 100644 --- a/src/compile/extensions.rs +++ b/src/compile/extensions.rs @@ -332,7 +332,7 @@ extension_enum! { // ─── Lean 4 ────────────────────────────────────────────────────────── use crate::runtimes::lean::{ - self, LeanRuntimeConfig, LEAN_BASH_COMMANDS, LEAN_REQUIRED_HOSTS, + self, LeanRuntimeConfig, LEAN_BASH_COMMANDS, }; /// Lean 4 runtime extension. @@ -359,7 +359,7 @@ impl CompilerExtension for LeanExtension { } fn required_hosts(&self) -> Vec { - LEAN_REQUIRED_HOSTS.iter().map(|h| (*h).to_string()).collect() + vec!["lean".to_string()] } fn required_bash_commands(&self) -> Vec { @@ -857,9 +857,9 @@ mod tests { fn test_lean_required_hosts() { let ext = LeanExtension::new(LeanRuntimeConfig::Enabled(true)); let hosts = ext.required_hosts(); - assert!(hosts.contains(&"elan.lean-lang.org".to_string())); - assert!(hosts.contains(&"leanprover.github.io".to_string())); - assert!(hosts.contains(&"lean-lang.org".to_string())); + // Lean extension returns the ecosystem identifier; domain expansion + // happens in generate_allowed_domains(). + assert_eq!(hosts, vec!["lean".to_string()]); } #[test] diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index e5e786d..92f3551 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -307,10 +307,18 @@ fn generate_allowed_domains( } } - // Add extension-declared hosts (runtimes + first-party tools) + // Add extension-declared hosts (runtimes + first-party tools). + // Extensions may return ecosystem identifiers (e.g., "lean") which are + // expanded to their domain lists, or raw domain names. for ext in extensions { for host in ext.required_hosts() { - hosts.insert(host); + if is_ecosystem_identifier(&host) { + for domain in get_ecosystem_domains(&host) { + hosts.insert(domain); + } + } else { + hosts.insert(host); + } } } diff --git a/src/data/ecosystem_domains.json b/src/data/ecosystem_domains.json index d7aad0d..7d0e977 100644 --- a/src/data/ecosystem_domains.json +++ b/src/data/ecosystem_domains.json @@ -160,6 +160,7 @@ ], "julia": ["pkg.julialang.org", "*.pkg.julialang.org", "julialang.org", "julialang-s3.julialang.org", "storage.julialang.net"], "kotlin": ["download.jetbrains.com", "ge.jetbrains.com", "packages.jetbrains.team", "kotlin.bintray.com", "maven.pkg.jetbrains.space"], + "lean": ["elan.lean-lang.org", "leanprover.github.io", "lean-lang.org"], "linux-distros": [ "deb.debian.org", "security.debian.org", From c5ee2339547039041e455c7c5cda0ed6fa89db9d Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:25:50 +0100 Subject: [PATCH 03/12] feat: add reservoir and static lean-lang domains to lean ecosystem Add reservoir.lean-lang.org (Lake package registry) and static.lean-lang.org (toolchain binary downloads) to the lean ecosystem entry. Update LEAN_REQUIRED_HOSTS constant to match. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/data/ecosystem_domains.json | 2 +- src/runtimes/lean.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/data/ecosystem_domains.json b/src/data/ecosystem_domains.json index 7d0e977..f7fb171 100644 --- a/src/data/ecosystem_domains.json +++ b/src/data/ecosystem_domains.json @@ -160,7 +160,7 @@ ], "julia": ["pkg.julialang.org", "*.pkg.julialang.org", "julialang.org", "julialang-s3.julialang.org", "storage.julialang.net"], "kotlin": ["download.jetbrains.com", "ge.jetbrains.com", "packages.jetbrains.team", "kotlin.bintray.com", "maven.pkg.jetbrains.space"], - "lean": ["elan.lean-lang.org", "leanprover.github.io", "lean-lang.org"], + "lean": ["elan.lean-lang.org", "leanprover.github.io", "lean-lang.org", "reservoir.lean-lang.org", "static.lean-lang.org"], "linux-distros": [ "deb.debian.org", "security.debian.org", diff --git a/src/runtimes/lean.rs b/src/runtimes/lean.rs index e90fe53..e794772 100644 --- a/src/runtimes/lean.rs +++ b/src/runtimes/lean.rs @@ -77,10 +77,15 @@ pub const LEAN_BASH_COMMANDS: &[&str] = &["lean", "lake", "elan"]; /// Network domains required by the Lean runtime (elan installer + toolchain downloads). /// github.com and *.githubusercontent.com are already in CORE_ALLOWED_HOSTS. +/// NOTE: The canonical list is now in `src/data/ecosystem_domains.json` under the "lean" key. +/// The LeanExtension returns "lean" as an ecosystem identifier, so this constant is +/// kept only for reference. Update ecosystem_domains.json instead. pub const LEAN_REQUIRED_HOSTS: &[&str] = &[ "elan.lean-lang.org", "leanprover.github.io", "lean-lang.org", + "reservoir.lean-lang.org", + "static.lean-lang.org", ]; /// Generate the elan installation step for Lean 4. From 9d6026f80f675599779a15851a728ce36981e6c2 Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:27:07 +0100 Subject: [PATCH 04/12] refactor: remove unused LEAN_REQUIRED_HOSTS constant The canonical lean domain list is now in ecosystem_domains.json. The LeanExtension returns the "lean" ecosystem identifier, making this constant dead code. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/runtimes/lean.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/runtimes/lean.rs b/src/runtimes/lean.rs index e794772..8a01a6b 100644 --- a/src/runtimes/lean.rs +++ b/src/runtimes/lean.rs @@ -75,19 +75,6 @@ pub struct LeanOptions { /// Bash commands that the Lean runtime adds to the allow-list. pub const LEAN_BASH_COMMANDS: &[&str] = &["lean", "lake", "elan"]; -/// Network domains required by the Lean runtime (elan installer + toolchain downloads). -/// github.com and *.githubusercontent.com are already in CORE_ALLOWED_HOSTS. -/// NOTE: The canonical list is now in `src/data/ecosystem_domains.json` under the "lean" key. -/// The LeanExtension returns "lean" as an ecosystem identifier, so this constant is -/// kept only for reference. Update ecosystem_domains.json instead. -pub const LEAN_REQUIRED_HOSTS: &[&str] = &[ - "elan.lean-lang.org", - "leanprover.github.io", - "lean-lang.org", - "reservoir.lean-lang.org", - "static.lean-lang.org", -]; - /// Generate the elan installation step for Lean 4. /// /// Installs elan (Lean toolchain manager) and the specified toolchain. From c38aa128f16b9057e152c7853fb6a8b62021878e Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:30:15 +0100 Subject: [PATCH 05/12] fix: preserve ado-aw-specific entries during ecosystem JSON sync The sync workflow previously replaced the local file with upstream verbatim, which would delete ado-aw-specific entries like 'lean'. Updated instructions to merge upstream changes while preserving local-only keys. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/update-awf-version.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/update-awf-version.md b/.github/workflows/update-awf-version.md index 36f94da..618be13 100644 --- a/.github/workflows/update-awf-version.md +++ b/.github/workflows/update-awf-version.md @@ -129,17 +129,24 @@ Read the file `pkg/workflow/data/ecosystem_domains.json` from the `main` branch Read `src/data/ecosystem_domains.json` in this repository. -### Step 3: Compare Content +### Step 3: Merge and Compare -Compare the upstream and local files. If they are identical, **skip** — the file is up to date. +Our local file may contain **additional entries** that do not exist upstream (e.g., `"lean"`). These are ado-aw-specific additions and must be preserved. + +Merge the two files as follows: +- Start with all entries from the **upstream** file (updating any existing keys to match upstream values). +- **Add back** any keys that exist in the local file but **not** in the upstream file. These are ado-aw-specific entries. +- Maintain alphabetical key ordering in the final JSON. + +If the merged result is identical to the current local file, **skip** — everything is up to date. Before proceeding, also check whether a PR already exists with the title `chore: sync ecosystem_domains.json from gh-aw`. If one is already open, **skip** to avoid duplicates. ### Step 4: Create a Sync PR -If the files differ: +If the merged result differs from the current local file: -1. Replace the contents of `src/data/ecosystem_domains.json` with the upstream version exactly as-is (preserve formatting). +1. Write the merged JSON to `src/data/ecosystem_domains.json` (preserve 2-space indentation, one key per line, trailing newline). 2. Create a pull request: @@ -148,7 +155,9 @@ If the files differ: ```markdown ## Ecosystem Domains Sync - Updates `src/data/ecosystem_domains.json` to match the upstream source at [`github/gh-aw/pkg/workflow/data/ecosystem_domains.json`](https://github.com/github/gh-aw/blob/main/pkg/workflow/data/ecosystem_domains.json). + Merges upstream changes from [`github/gh-aw/pkg/workflow/data/ecosystem_domains.json`](https://github.com/github/gh-aw/blob/main/pkg/workflow/data/ecosystem_domains.json) into `src/data/ecosystem_domains.json`. + + This sync preserves any ado-aw-specific entries (keys not present upstream) while updating all shared entries to match the upstream source. This file defines the domain allowlists for ecosystem identifiers (e.g., `python`, `rust`, `node`) used in the `network.allow` front matter field. From c772e000580c05ee894e61d9dff866e99b32b2c0 Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:51:01 +0100 Subject: [PATCH 06/12] refactor: rename network.allowed/blocked to network.allow/block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align field names with gh-aw conventions by renaming: - network.allowed → network.allow - network.blocked → network.block Updates front matter parsing, compiler, tests, documentation, and all prompt/agent files to use the new field names. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/agents/agentic-workflows.agent.md | 2 +- .github/workflows/update-awf-version.md | 2 +- AGENTS.md | 14 +++++------ README.md | 6 ++--- prompts/create-ado-agentic-workflow.md | 2 +- prompts/debug-ado-agentic-workflow.md | 10 ++++---- prompts/update-ado-agentic-workflow.md | 4 +-- src/compile/standalone.rs | 28 ++++++++++----------- src/compile/types.rs | 4 +-- src/ecosystem_domains.rs | 2 +- tests/compiler_tests.rs | 30 +++++++++++------------ tests/fixtures/azure-devops-mcp-agent.md | 2 +- 12 files changed, 53 insertions(+), 53 deletions(-) diff --git a/.github/agents/agentic-workflows.agent.md b/.github/agents/agentic-workflows.agent.md index 776cdea..d94ac15 100644 --- a/.github/agents/agentic-workflows.agent.md +++ b/.github/agents/agentic-workflows.agent.md @@ -97,5 +97,5 @@ ado-aw check my-agent.yml - Agent files must be compiled with `ado-aw compile` after frontmatter changes - Markdown body changes don't require recompilation -- Follow security best practices: minimal permissions, explicit `network.allow`, scoped service connections +- Follow security best practices: minimal permissions, explicit `network.allowed`, scoped service connections - Reference full docs at the [AGENTS.md](../../AGENTS.md) in this repo diff --git a/.github/workflows/update-awf-version.md b/.github/workflows/update-awf-version.md index 618be13..6a68205 100644 --- a/.github/workflows/update-awf-version.md +++ b/.github/workflows/update-awf-version.md @@ -159,7 +159,7 @@ If the merged result differs from the current local file: This sync preserves any ado-aw-specific entries (keys not present upstream) while updating all shared entries to match the upstream source. - This file defines the domain allowlists for ecosystem identifiers (e.g., `python`, `rust`, `node`) used in the `network.allow` front matter field. + This file defines the domain allowlists for ecosystem identifiers (e.g., `python`, `rust`, `node`) used in the `network.allowed` front matter field. --- *This PR was opened automatically by the dependency version updater workflow.* diff --git a/AGENTS.md b/AGENTS.md index 6d6c77c..58ed091 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -193,7 +193,7 @@ teardown: # separate job AFTER safe outputs processing - bash: echo "Teardown job step" displayName: "Teardown step" network: # optional network policy (standalone target only) - allow: # allowed host patterns and/or ecosystem identifiers + allowed: # allowed host patterns and/or ecosystem identifiers - python # ecosystem identifier — expands to Python/PyPI domains - "*.mycompany.com" # raw domain pattern blocked: # blocked host patterns or ecosystems (removes from allow list) @@ -788,8 +788,8 @@ If no passthrough env vars are needed, this marker is replaced with an empty str Should be replaced with the comma-separated domain list for AWF's `--allow-domains` flag. The list includes: 1. Core Azure DevOps/GitHub endpoints (from `allowed_hosts.rs`) 2. MCP-specific endpoints for each enabled MCP -3. Ecosystem identifier expansions from `network.allow:` (e.g., `python` → PyPI/pip domains) -4. User-specified additional hosts from `network.allow:` front matter +3. Ecosystem identifier expansions from `network.allowed:` (e.g., `python` → PyPI/pip domains) +4. User-specified additional hosts from `network.allowed:` front matter The output is formatted as a comma-separated string (e.g., `github.com,*.dev.azure.com,api.github.com`). @@ -1608,7 +1608,7 @@ mcp-servers: permissions: read: my-read-arm-connection network: - allow: + allowed: - "dev.azure.com" - "*.dev.azure.com" ``` @@ -1619,7 +1619,7 @@ network: 2. **Containerization**: Stdio MCP servers run as isolated Docker containers (per MCPG spec §3.2.1) 3. **Environment Isolation**: MCP containers are spawned by MCPG with only the configured environment variables 4. **MCPG Gateway**: All MCP traffic flows through the MCP Gateway which enforces tool-level filtering -5. **Network Isolation**: MCP containers run within the same AWF-isolated network. Users must explicitly allow external domains via `network.allow` +5. **Network Isolation**: MCP containers run within the same AWF-isolated network. Users must explicitly allow external domains via `network.allowed` ## Network Isolation (AWF) @@ -1673,7 +1673,7 @@ Agents can specify additional allowed hosts in their front matter using either e ```yaml network: - allow: + allowed: - python # Ecosystem identifier — expands to Python/PyPI domains - rust # Ecosystem identifier — expands to Rust/crates.io domains - "*.mycompany.com" # Raw domain pattern @@ -1716,7 +1716,7 @@ The `network.blocked` field removes hosts from the combined allowlist. Both ecos ```yaml network: - allow: + allowed: - python - node blocked: diff --git a/README.md b/README.md index eb7ded7..68b06d7 100644 --- a/README.md +++ b/README.md @@ -337,7 +337,7 @@ mcp-servers: ``` Custom MCP containers run inside the AWF network sandbox. Add any required -external domains to `network.allow`. +external domains to `network.allowed`. --- @@ -410,12 +410,12 @@ reachable. The allowlist is built from: 1. **Core domains** — Azure DevOps, GitHub, Microsoft auth, Azure storage 2. **MCP domains** — automatically added per enabled MCP -3. **User domains** — from `network.allow` in front matter +3. **User domains** — from `network.allowed` in front matter 4. **Minus blocked** — `network.blocked` entries are removed by exact match (wildcard patterns like `*.example.com` are not affected by blocking a specific subdomain) ```yaml network: - allow: + allowed: - "*.mycompany.com" - "api.external-service.com" blocked: diff --git a/prompts/create-ado-agentic-workflow.md b/prompts/create-ado-agentic-workflow.md index ed4234b..efd370b 100644 --- a/prompts/create-ado-agentic-workflow.md +++ b/prompts/create-ado-agentic-workflow.md @@ -328,7 +328,7 @@ teardown: # Separate job AFTER ProcessSafeOutputs Additional allowed domains beyond the built-in allowlist: ```yaml network: - allow: + allowed: - "*.mycompany.com" - "api.external-service.com" blocked: diff --git a/prompts/debug-ado-agentic-workflow.md b/prompts/debug-ado-agentic-workflow.md index 183e09f..c2d6863 100644 --- a/prompts/debug-ado-agentic-workflow.md +++ b/prompts/debug-ado-agentic-workflow.md @@ -67,20 +67,20 @@ This is the most complex stage — it involves downloading binaries, starting Do | Error Pattern | Likely Cause | Fix | |---------------|-------------|-----| -| `503 Service Unavailable` from Squid | Domain not in allowlist | Add domain to `network.allow` in front matter | +| `503 Service Unavailable` from Squid | Domain not in allowlist | Add domain to `network.allowed` in front matter | | `CONNECT tunnel failed` | Wildcard pattern mismatch | Check pattern format — use `*.example.com` not `example.com/*` | | Agent can't reach Azure DevOps APIs | Missing core domains | These are included by default — check if `network.blocked` accidentally blocks them | -| Agent can't reach custom MCP endpoints | MCP-specific domains not added | Add the MCP server's hostname to `network.allow` | +| Agent can't reach custom MCP endpoints | MCP-specific domains not added | Add the MCP server's hostname to `network.allowed` | **Checking the allowlist**: The compiler merges three domain sources: 1. Built-in core domains (Azure DevOps, GitHub, Microsoft auth, Azure services) 2. MCP-specific domains (auto-added per enabled MCP) -3. User-specified domains from `network.allow` +3. User-specified domains from `network.allowed` If the agent needs to reach `api.myservice.com`, add it: ```yaml network: - allow: + allowed: - "api.myservice.com" - "*.myservice.com" # if subdomains are also needed ``` @@ -362,7 +362,7 @@ Use this checklist to systematically rule out common issues: - [ ] **Compilation in sync**: `ado-aw check ` passes - [ ] **Correct stage identified**: Know which of the 3 jobs failed -- [ ] **Network allowlist**: All required domains are in `network.allow` or built-in +- [ ] **Network allowlist**: All required domains are in `network.allowed` or built-in - [ ] **MCP tools allowed**: Every tool the agent needs is in an `allowed:` list - [ ] **Permissions set**: `permissions.write` is present if write safe-outputs are configured - [ ] **Service connections authorized**: ARM connections are permitted for this pipeline diff --git a/prompts/update-ado-agentic-workflow.md b/prompts/update-ado-agentic-workflow.md index bc08bca..52eb483 100644 --- a/prompts/update-ado-agentic-workflow.md +++ b/prompts/update-ado-agentic-workflow.md @@ -175,7 +175,7 @@ mcp-servers: - get_status ``` -Custom MCPs **must** have an explicit `allowed:` list. Add any required external domains to `network.allow`. +Custom MCPs **must** have an explicit `allowed:` list. Add any required external domains to `network.allowed`. ### Adding Permissions @@ -248,7 +248,7 @@ Before finalizing any update, verify: 5. **Workspace consistency**: If `workspace: repo` is set, ensure `checkout:` has additional repositories. If only `self` is checked out, `workspace: repo` is unnecessary (the compiler warns about this). -6. **Network domains**: If new MCPs or external services are added, ensure required domains are in `network.allow`. +6. **Network domains**: If new MCPs or external services are added, ensure required domains are in `network.allowed`. 7. **Target compatibility**: If `target: 1es`, custom MCPs (with `container:`) are not supported — only built-in MCPs with service connections. diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 92f3551..52f781f 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -262,7 +262,7 @@ impl Compiler for StandaloneCompiler { /// `--allow-domains` flag. The list includes: /// 1. Core Azure DevOps/GitHub endpoints /// 2. MCP-specific endpoints for each enabled MCP -/// 3. User-specified additional hosts from network.allow +/// 3. User-specified additional hosts from network.allowed fn generate_allowed_domains( front_matter: &FrontMatter, extensions: &[super::extensions::Extension], @@ -284,7 +284,7 @@ fn generate_allowed_domains( let user_hosts: Vec = front_matter .network .as_ref() - .map(|n| n.allow.clone()) + .map(|n| n.allowed.clone()) .unwrap_or_default(); // Generate the allowlist by combining core + MCP + extension + user hosts @@ -330,7 +330,7 @@ fn generate_allowed_domains( let domains = get_ecosystem_domains(host); if domains.is_empty() && !is_known_ecosystem(host) { eprintln!( - "warning: network.allow contains unknown ecosystem identifier '{}'. \ + "warning: network.allowed contains unknown ecosystem identifier '{}'. \ Known ecosystems: python, rust, node, go, java, etc. \ If this is a domain name, it should contain a dot.", host @@ -348,14 +348,14 @@ fn generate_allowed_domains( .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); if !valid_chars { anyhow::bail!( - "network.allow domain '{}' contains characters invalid in DNS names. \ + "network.allowed domain '{}' contains characters invalid in DNS names. \ Only ASCII alphanumerics, '.', '-', and '*' are allowed.", host ); } if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { anyhow::bail!( - "network.allow domain '{}' uses '*' in an unsupported position. \ + "network.allowed domain '{}' uses '*' in an unsupported position. \ Wildcards must appear only as a leading prefix (e.g. '*.example.com').", host ); @@ -1815,7 +1815,7 @@ mod tests { fn test_generate_allowed_domains_blocked_takes_precedence_over_allow() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["evil.example.com".to_string()], + allowed: vec!["evil.example.com".to_string()], blocked: vec!["evil.example.com".to_string()], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1841,7 +1841,7 @@ mod tests { fn test_generate_allowed_domains_user_allow_host_included() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["api.mycompany.com".to_string()], + allowed: vec!["api.mycompany.com".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1859,7 +1859,7 @@ mod tests { // also remove wildcard variants like "*.github.com". This is intentional. let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec![], + allowed: vec![], blocked: vec!["github.com".to_string()], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1875,7 +1875,7 @@ mod tests { fn test_generate_allowed_domains_invalid_host_returns_error() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["bad host!".to_string()], + allowed: vec!["bad host!".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1913,7 +1913,7 @@ mod tests { fn test_generate_allowed_domains_ecosystem_python_expands() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["python".to_string()], + allowed: vec!["python".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1926,7 +1926,7 @@ mod tests { fn test_generate_allowed_domains_ecosystem_rust_expands() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["rust".to_string()], + allowed: vec!["rust".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1939,7 +1939,7 @@ mod tests { fn test_generate_allowed_domains_ecosystem_mixed_with_raw_domains() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["python".to_string(), "api.custom.com".to_string()], + allowed: vec!["python".to_string(), "api.custom.com".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1952,7 +1952,7 @@ mod tests { fn test_generate_allowed_domains_ecosystem_blocked_removes_all_ecosystem_domains() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["python".to_string()], + allowed: vec!["python".to_string()], blocked: vec!["python".to_string()], }); let exts = super::super::extensions::collect_extensions(&fm); @@ -1965,7 +1965,7 @@ mod tests { fn test_generate_allowed_domains_multiple_ecosystems() { let mut fm = minimal_front_matter(); fm.network = Some(crate::compile::types::NetworkConfig { - allow: vec!["python".to_string(), "node".to_string(), "rust".to_string()], + allowed: vec!["python".to_string(), "node".to_string(), "rust".to_string()], blocked: vec![], }); let exts = super::super::extensions::collect_extensions(&fm); diff --git a/src/compile/types.rs b/src/compile/types.rs index 38e4c69..dbbe0ef 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -600,8 +600,8 @@ pub struct NetworkConfig { /// Additional allowed host patterns (supports wildcards like *.example.com) /// Core Azure DevOps and GitHub hosts are always allowed. #[serde(default)] - pub allow: Vec, - /// Blocked host patterns (takes precedence over allow) + pub allowed: Vec, + /// Blocked host patterns (takes precedence over allowed) #[serde(default)] pub blocked: Vec, } diff --git a/src/ecosystem_domains.rs b/src/ecosystem_domains.rs index 7666d79..d30607c 100644 --- a/src/ecosystem_domains.rs +++ b/src/ecosystem_domains.rs @@ -5,7 +5,7 @@ //! identifiers (e.g., `"python"`, `"rust"`, `"node"`) to arrays of domains that //! those ecosystems require for package management, registry access, etc. //! -//! Users reference these identifiers in the `network.allow` front matter field +//! Users reference these identifiers in the `network.allowed` front matter field //! instead of listing individual domains: //! //! ```yaml diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index b19df6a..db499af 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -2523,7 +2523,7 @@ tools: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a valid leading wildcard (*.example.com) compiles successfully +/// Test that network.allowed with a valid leading wildcard (*.example.com) compiles successfully #[test] fn test_network_allow_valid_wildcard_compiles() { let temp_dir = std::env::temp_dir().join(format!( @@ -2534,9 +2534,9 @@ fn test_network_allow_valid_wildcard_compiles() { let input = r#"--- name: "Network Wildcard Agent" -description: "Agent with valid leading wildcard in network.allow" +description: "Agent with valid leading wildcard in network.allowed" network: - allow: + allowed: - "*.mycompany.com" - "api.external-service.com" --- @@ -2563,7 +2563,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a trailing wildcard (example.*) fails compilation +/// Test that network.allowed with a trailing wildcard (example.*) fails compilation #[test] fn test_network_allow_trailing_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2574,9 +2574,9 @@ fn test_network_allow_trailing_wildcard_fails() { let input = r#"--- name: "Network Trailing Wildcard Agent" -description: "Agent with trailing wildcard in network.allow" +description: "Agent with trailing wildcard in network.allowed" network: - allow: + allowed: - "example.*" --- @@ -2607,7 +2607,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a mid-string wildcard (ex*ample.com) fails compilation +/// Test that network.allowed with a mid-string wildcard (ex*ample.com) fails compilation #[test] fn test_network_allow_mid_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2618,9 +2618,9 @@ fn test_network_allow_mid_wildcard_fails() { let input = r#"--- name: "Network Mid Wildcard Agent" -description: "Agent with mid-string wildcard in network.allow" +description: "Agent with mid-string wildcard in network.allowed" network: - allow: + allowed: - "ex*ample.com" --- @@ -2651,7 +2651,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a double wildcard (*.*.com) fails compilation +/// Test that network.allowed with a double wildcard (*.*.com) fails compilation #[test] fn test_network_allow_double_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2662,9 +2662,9 @@ fn test_network_allow_double_wildcard_fails() { let input = r#"--- name: "Network Double Wildcard Agent" -description: "Agent with double wildcard in network.allow" +description: "Agent with double wildcard in network.allowed" network: - allow: + allowed: - "*.*.com" --- @@ -2695,7 +2695,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a bare '*' fails compilation +/// Test that network.allowed with a bare '*' fails compilation #[test] fn test_network_allow_bare_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2706,9 +2706,9 @@ fn test_network_allow_bare_wildcard_fails() { let input = r#"--- name: "Network Bare Wildcard Agent" -description: "Agent with bare wildcard in network.allow" +description: "Agent with bare wildcard in network.allowed" network: - allow: + allowed: - "*" --- diff --git a/tests/fixtures/azure-devops-mcp-agent.md b/tests/fixtures/azure-devops-mcp-agent.md index 43e4036..2dc9a4d 100644 --- a/tests/fixtures/azure-devops-mcp-agent.md +++ b/tests/fixtures/azure-devops-mcp-agent.md @@ -20,7 +20,7 @@ safe-outputs: create-work-item: work-item-type: Task network: - allow: + allowed: - "dev.azure.com" - "*.dev.azure.com" --- From 3abf1d550f27e851b1acd5035cc1d822caae378d Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:51:58 +0100 Subject: [PATCH 07/12] chore: regenerate workflows with gh-aw compile Recompile lock files to reflect updated network field names in update-awf-version workflow. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/update-awf-version.lock.yml | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/update-awf-version.lock.yml b/.github/workflows/update-awf-version.lock.yml index 3d85347..f90eae1 100644 --- a/.github/workflows/update-awf-version.lock.yml +++ b/.github/workflows/update-awf-version.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"cb8cbfd520e7a77629b57d7de3c1bf9f73b1ed875bf268c72f055eac67a102f3","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"206b4b4fc88867928fa4ef288b770b224cd48404685eca0c639ddd4db3a72525","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_CI_TRIGGER_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc","version":"v0.68.1"}]} # ___ _ _ # / _ \ | | (_) @@ -22,7 +22,7 @@ # # For more information: https://github.github.com/gh-aw/introduction/overview/ # -# Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and opens PRs to update pinned version constants +# Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and syncs ecosystem_domains.json from gh-aw. Opens PRs for any updates found. # # Secrets used: # - COPILOT_GITHUB_TOKEN @@ -158,19 +158,19 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh" { - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' - GH_AW_PROMPT_45895f8f0ea189d4_EOF + GH_AW_PROMPT_168313d4fdec629b_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' - Tools: create_pull_request(max:3), missing_tool, missing_data, noop - GH_AW_PROMPT_45895f8f0ea189d4_EOF + Tools: create_pull_request(max:4), missing_tool, missing_data, noop + GH_AW_PROMPT_168313d4fdec629b_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_create_pull_request.md" - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' The following GitHub context information is available for this workflow: @@ -200,12 +200,12 @@ jobs: {{/if}} - GH_AW_PROMPT_45895f8f0ea189d4_EOF + GH_AW_PROMPT_168313d4fdec629b_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_45895f8f0ea189d4_EOF' + cat << 'GH_AW_PROMPT_168313d4fdec629b_EOF' {{#runtime-import .github/workflows/update-awf-version.md}} - GH_AW_PROMPT_45895f8f0ea189d4_EOF + GH_AW_PROMPT_168313d4fdec629b_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 @@ -371,15 +371,15 @@ jobs: mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_8ca098a8137f403d_EOF' - {"create_pull_request":{"max":3,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"]},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} - GH_AW_SAFE_OUTPUTS_CONFIG_8ca098a8137f403d_EOF + cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_0dee48e36c1f8b4f_EOF' + {"create_pull_request":{"max":4,"max_patch_size":1024,"protected_files":["package.json","bun.lockb","bunfig.toml","deno.json","deno.jsonc","deno.lock","global.json","NuGet.Config","Directory.Packages.props","mix.exs","mix.lock","go.mod","go.sum","stack.yaml","stack.yaml.lock","pom.xml","build.gradle","build.gradle.kts","settings.gradle","settings.gradle.kts","gradle.properties","package-lock.json","yarn.lock","pnpm-lock.yaml","npm-shrinkwrap.json","requirements.txt","Pipfile","Pipfile.lock","pyproject.toml","setup.py","setup.cfg","Gemfile","Gemfile.lock","uv.lock","CODEOWNERS"],"protected_path_prefixes":[".github/",".agents/"]},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} + GH_AW_SAFE_OUTPUTS_CONFIG_0dee48e36c1f8b4f_EOF - name: Write Safe Outputs Tools env: GH_AW_TOOLS_META_JSON: | { "description_suffixes": { - "create_pull_request": " CONSTRAINTS: Maximum 3 pull request(s) can be created." + "create_pull_request": " CONSTRAINTS: Maximum 4 pull request(s) can be created." }, "repo_params": {}, "dynamic_tools": [] @@ -571,7 +571,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_8fe4e51e6e54b1d2_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" + cat << GH_AW_MCP_CONFIG_3a914e19b96138f1_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" { "mcpServers": { "github": { @@ -612,7 +612,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_8fe4e51e6e54b1d2_EOF + GH_AW_MCP_CONFIG_3a914e19b96138f1_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: @@ -1007,7 +1007,7 @@ jobs: uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 env: WORKFLOW_NAME: "Dependency Version Updater" - WORKFLOW_DESCRIPTION: "Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and opens PRs to update pinned version constants" + WORKFLOW_DESCRIPTION: "Checks for new releases of gh-aw-firewall, copilot-cli, and gh-aw-mcpg, and syncs ecosystem_domains.json from gh-aw. Opens PRs for any updates found." HAS_PATCH: ${{ needs.agent.outputs.has_patch }} with: script: | @@ -1172,7 +1172,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_pull_request\":{\"max\":3,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_path_prefixes\":[\".github/\",\".agents/\"]},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"report_incomplete\":{}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_pull_request\":{\"max\":4,\"max_patch_size\":1024,\"protected_files\":[\"package.json\",\"bun.lockb\",\"bunfig.toml\",\"deno.json\",\"deno.jsonc\",\"deno.lock\",\"global.json\",\"NuGet.Config\",\"Directory.Packages.props\",\"mix.exs\",\"mix.lock\",\"go.mod\",\"go.sum\",\"stack.yaml\",\"stack.yaml.lock\",\"pom.xml\",\"build.gradle\",\"build.gradle.kts\",\"settings.gradle\",\"settings.gradle.kts\",\"gradle.properties\",\"package-lock.json\",\"yarn.lock\",\"pnpm-lock.yaml\",\"npm-shrinkwrap.json\",\"requirements.txt\",\"Pipfile\",\"Pipfile.lock\",\"pyproject.toml\",\"setup.py\",\"setup.cfg\",\"Gemfile\",\"Gemfile.lock\",\"uv.lock\",\"CODEOWNERS\",\"AGENTS.md\"],\"protected_path_prefixes\":[\".github/\",\".agents/\"]},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"report_incomplete\":{}}" GH_AW_CI_TRIGGER_TOKEN: ${{ secrets.GH_AW_CI_TRIGGER_TOKEN }} with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} From 97b4cf3895e311d190b562c7b2e68d9260264bbd Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 12:55:24 +0100 Subject: [PATCH 08/12] refactor: use else instead of continue in network allow loop Replace continue-after-if with an if/else block for clearer mutually-exclusive branching between ecosystem identifiers and raw domain validation. Also fix remaining stale "network.allowed" references in error messages, comments, and test descriptions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/standalone.rs | 45 +++++++++++++++++++-------------------- src/ecosystem_domains.rs | 2 +- tests/compiler_tests.rs | 20 ++++++++--------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 52f781f..46629b6 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -262,7 +262,7 @@ impl Compiler for StandaloneCompiler { /// `--allow-domains` flag. The list includes: /// 1. Core Azure DevOps/GitHub endpoints /// 2. MCP-specific endpoints for each enabled MCP -/// 3. User-specified additional hosts from network.allowed +/// 3. User-specified additional hosts from network.allow fn generate_allowed_domains( front_matter: &FrontMatter, extensions: &[super::extensions::Extension], @@ -330,7 +330,7 @@ fn generate_allowed_domains( let domains = get_ecosystem_domains(host); if domains.is_empty() && !is_known_ecosystem(host) { eprintln!( - "warning: network.allowed contains unknown ecosystem identifier '{}'. \ + "warning: network.allow contains unknown ecosystem identifier '{}'. \ Known ecosystems: python, rust, node, go, java, etc. \ If this is a domain name, it should contain a dot.", host @@ -339,28 +339,27 @@ fn generate_allowed_domains( for domain in domains { hosts.insert(domain); } - continue; - } - - let valid_chars = !host.is_empty() - && host - .chars() - .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); - if !valid_chars { - anyhow::bail!( - "network.allowed domain '{}' contains characters invalid in DNS names. \ - Only ASCII alphanumerics, '.', '-', and '*' are allowed.", - host - ); - } - if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { - anyhow::bail!( - "network.allowed domain '{}' uses '*' in an unsupported position. \ - Wildcards must appear only as a leading prefix (e.g. '*.example.com').", - host - ); + } else { + let valid_chars = !host.is_empty() + && host + .chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); + if !valid_chars { + anyhow::bail!( + "network.allow domain '{}' contains characters invalid in DNS names. \ + Only ASCII alphanumerics, '.', '-', and '*' are allowed.", + host + ); + } + if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { + anyhow::bail!( + "network.allow domain '{}' uses '*' in an unsupported position. \ + Wildcards must appear only as a leading prefix (e.g. '*.example.com').", + host + ); + } + hosts.insert(host.clone()); } - hosts.insert(host.clone()); } // Remove blocked hosts (supports both ecosystem identifiers and raw domains) diff --git a/src/ecosystem_domains.rs b/src/ecosystem_domains.rs index d30607c..7666d79 100644 --- a/src/ecosystem_domains.rs +++ b/src/ecosystem_domains.rs @@ -5,7 +5,7 @@ //! identifiers (e.g., `"python"`, `"rust"`, `"node"`) to arrays of domains that //! those ecosystems require for package management, registry access, etc. //! -//! Users reference these identifiers in the `network.allowed` front matter field +//! Users reference these identifiers in the `network.allow` front matter field //! instead of listing individual domains: //! //! ```yaml diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index db499af..0ca01b7 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -2523,7 +2523,7 @@ tools: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allowed with a valid leading wildcard (*.example.com) compiles successfully +/// Test that network.allow with a valid leading wildcard (*.example.com) compiles successfully #[test] fn test_network_allow_valid_wildcard_compiles() { let temp_dir = std::env::temp_dir().join(format!( @@ -2534,7 +2534,7 @@ fn test_network_allow_valid_wildcard_compiles() { let input = r#"--- name: "Network Wildcard Agent" -description: "Agent with valid leading wildcard in network.allowed" +description: "Agent with valid leading wildcard in network.allow" network: allowed: - "*.mycompany.com" @@ -2563,7 +2563,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allowed with a trailing wildcard (example.*) fails compilation +/// Test that network.allow with a trailing wildcard (example.*) fails compilation #[test] fn test_network_allow_trailing_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2574,7 +2574,7 @@ fn test_network_allow_trailing_wildcard_fails() { let input = r#"--- name: "Network Trailing Wildcard Agent" -description: "Agent with trailing wildcard in network.allowed" +description: "Agent with trailing wildcard in network.allow" network: allowed: - "example.*" @@ -2607,7 +2607,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allowed with a mid-string wildcard (ex*ample.com) fails compilation +/// Test that network.allow with a mid-string wildcard (ex*ample.com) fails compilation #[test] fn test_network_allow_mid_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2618,7 +2618,7 @@ fn test_network_allow_mid_wildcard_fails() { let input = r#"--- name: "Network Mid Wildcard Agent" -description: "Agent with mid-string wildcard in network.allowed" +description: "Agent with mid-string wildcard in network.allow" network: allowed: - "ex*ample.com" @@ -2651,7 +2651,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allowed with a double wildcard (*.*.com) fails compilation +/// Test that network.allow with a double wildcard (*.*.com) fails compilation #[test] fn test_network_allow_double_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2662,7 +2662,7 @@ fn test_network_allow_double_wildcard_fails() { let input = r#"--- name: "Network Double Wildcard Agent" -description: "Agent with double wildcard in network.allowed" +description: "Agent with double wildcard in network.allow" network: allowed: - "*.*.com" @@ -2695,7 +2695,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allowed with a bare '*' fails compilation +/// Test that network.allow with a bare '*' fails compilation #[test] fn test_network_allow_bare_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2706,7 +2706,7 @@ fn test_network_allow_bare_wildcard_fails() { let input = r#"--- name: "Network Bare Wildcard Agent" -description: "Agent with bare wildcard in network.allowed" +description: "Agent with bare wildcard in network.allow" network: allowed: - "*" From 410da0b0fb8c47c6d86ccae8325770824b368043 Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 13:10:11 +0100 Subject: [PATCH 09/12] test: add schema validation for embedded ecosystem_domains.json Add two tests to ecosystem_domains::tests: - test_embedded_json_parses_as_expected_schema: validates the compile-time-embedded JSON deserializes correctly as HashMap> and every ecosystem has a non-empty domain list. - test_malformed_json_rejected: confirms serde_json rejects schema mismatches (string instead of array, non-string array elements, invalid JSON syntax), validating the safety of the .expect() guard on the LazyLock initializer. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ecosystem_domains.rs | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/ecosystem_domains.rs b/src/ecosystem_domains.rs index 7666d79..a7f4579 100644 --- a/src/ecosystem_domains.rs +++ b/src/ecosystem_domains.rs @@ -186,4 +186,48 @@ mod tests { // Ubuntu assert!(domains.contains(&"archive.ubuntu.com".to_string())); } + + #[test] + fn test_embedded_json_parses_as_expected_schema() { + // Validates that the compile-time-embedded ecosystem_domains.json + // deserializes into HashMap> without panicking. + let parsed: Result>, _> = + serde_json::from_str(ECOSYSTEM_JSON); + assert!( + parsed.is_ok(), + "embedded ecosystem_domains.json failed to parse: {}", + parsed.unwrap_err() + ); + let map = parsed.unwrap(); + assert!(!map.is_empty(), "ecosystem_domains.json should not be empty"); + // Every ecosystem should have a non-empty domain list + for (key, domains) in &map { + assert!( + !domains.is_empty(), + "ecosystem '{}' has an empty domain list", + key + ); + } + } + + #[test] + fn test_malformed_json_rejected() { + // Ensures serde_json correctly rejects JSON that doesn't match + // the expected HashMap> schema, validating + // the safety of the .expect() guard on the LazyLock. + let bad_schema = r#"{"python": "not-a-list"}"#; + let result: Result>, _> = + serde_json::from_str(bad_schema); + assert!(result.is_err(), "schema mismatch should produce an error"); + + let bad_json = r#"{"python": [123, true]}"#; + let result: Result>, _> = + serde_json::from_str(bad_json); + assert!(result.is_err(), "non-string array elements should produce an error"); + + let invalid_json = r#"{not valid json"#; + let result: Result>, _> = + serde_json::from_str(invalid_json); + assert!(result.is_err(), "invalid JSON syntax should produce an error"); + } } From 673fe041e4d0d762a09f61673ad99155f43ff97c Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 13:14:25 +0100 Subject: [PATCH 10/12] =?UTF-8?q?feat:=20add=20deny=5Funknown=5Ffields=20t?= =?UTF-8?q?o=20NetworkConfig=20to=20catch=20allow=E2=86=92allowed=20rename?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add #[serde(deny_unknown_fields)] to NetworkConfig so that the old field name (network.allow) produces a compile-time error instead of being silently ignored. This prevents users from losing their AWF domain allowlists after the rename to network.allowed. Also fix stale doc comments referencing the old field name. Tests added: - test_network_config_rejects_old_allow_field - test_network_config_accepts_allowed_field - test_network_config_rejects_arbitrary_unknown_field Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/types.rs | 60 +++++++++++++++++++++++++++++++++++++++- src/ecosystem_domains.rs | 2 +- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/compile/types.rs b/src/compile/types.rs index dbbe0ef..86a7aa0 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -594,8 +594,9 @@ fn default_model() -> String { /// The domain allowlist is dynamically generated based on: /// - Core Azure DevOps/GitHub endpoints (always included) /// - MCP-specific endpoints for each enabled MCP -/// - User-specified additional hosts from `allow` field +/// - User-specified additional hosts from `allowed` field #[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] +#[serde(deny_unknown_fields)] pub struct NetworkConfig { /// Additional allowed host patterns (supports wildcards like *.example.com) /// Core Azure DevOps and GitHub hosts are always allowed. @@ -1228,4 +1229,61 @@ Body let runtimes = fm.runtimes.as_ref().unwrap(); assert!(runtimes.lean.as_ref().unwrap().is_enabled()); } + + // ─── NetworkConfig deny_unknown_fields ────────────────────────────────── + + #[test] + fn test_network_config_rejects_old_allow_field() { + let content = r#"--- +name: "Test" +description: "Test" +network: + allow: + - "*.mycompany.com" +--- + +Body +"#; + let result = super::super::common::parse_markdown(content); + assert!(result.is_err(), "network.allow (old field name) should be rejected"); + let err = format!("{:#}", result.unwrap_err()); + assert!( + err.contains("unknown field `allow`"), + "error should mention unknown field `allow`, got: {}", + err + ); + } + + #[test] + fn test_network_config_accepts_allowed_field() { + let content = r#"--- +name: "Test" +description: "Test" +network: + allowed: + - "*.mycompany.com" +--- + +Body +"#; + let (fm, _) = super::super::common::parse_markdown(content).unwrap(); + let net = fm.network.unwrap(); + assert_eq!(net.allowed, vec!["*.mycompany.com"]); + assert!(net.blocked.is_empty()); + } + + #[test] + fn test_network_config_rejects_arbitrary_unknown_field() { + let content = r#"--- +name: "Test" +description: "Test" +network: + typo-field: true +--- + +Body +"#; + let result = super::super::common::parse_markdown(content); + assert!(result.is_err(), "unknown fields in network should be rejected"); + } } diff --git a/src/ecosystem_domains.rs b/src/ecosystem_domains.rs index a7f4579..c6af164 100644 --- a/src/ecosystem_domains.rs +++ b/src/ecosystem_domains.rs @@ -5,7 +5,7 @@ //! identifiers (e.g., `"python"`, `"rust"`, `"node"`) to arrays of domains that //! those ecosystems require for package management, registry access, etc. //! -//! Users reference these identifiers in the `network.allow` front matter field +//! Users reference these identifiers in the `network.allowed` front matter field //! instead of listing individual domains: //! //! ```yaml From a715c9450aa09eab6f1c390b92cbc855d84c3c08 Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 13:17:18 +0100 Subject: [PATCH 11/12] fix: update stale network.allow references to network.allowed Rename remaining occurrences of network.allow to network.allowed in: - src/compile/standalone.rs: doc comments and warning/error messages - tests/compiler_tests.rs: doc comments and test description strings Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/standalone.rs | 8 ++++---- tests/compiler_tests.rs | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 46629b6..67475f8 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -262,7 +262,7 @@ impl Compiler for StandaloneCompiler { /// `--allow-domains` flag. The list includes: /// 1. Core Azure DevOps/GitHub endpoints /// 2. MCP-specific endpoints for each enabled MCP -/// 3. User-specified additional hosts from network.allow +/// 3. User-specified additional hosts from network.allowed fn generate_allowed_domains( front_matter: &FrontMatter, extensions: &[super::extensions::Extension], @@ -330,7 +330,7 @@ fn generate_allowed_domains( let domains = get_ecosystem_domains(host); if domains.is_empty() && !is_known_ecosystem(host) { eprintln!( - "warning: network.allow contains unknown ecosystem identifier '{}'. \ + "warning: network.allowed contains unknown ecosystem identifier '{}'. \ Known ecosystems: python, rust, node, go, java, etc. \ If this is a domain name, it should contain a dot.", host @@ -346,14 +346,14 @@ fn generate_allowed_domains( .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); if !valid_chars { anyhow::bail!( - "network.allow domain '{}' contains characters invalid in DNS names. \ + "network.allowed domain '{}' contains characters invalid in DNS names. \ Only ASCII alphanumerics, '.', '-', and '*' are allowed.", host ); } if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { anyhow::bail!( - "network.allow domain '{}' uses '*' in an unsupported position. \ + "network.allowed domain '{}' uses '*' in an unsupported position. \ Wildcards must appear only as a leading prefix (e.g. '*.example.com').", host ); diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 0ca01b7..db499af 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -2523,7 +2523,7 @@ tools: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a valid leading wildcard (*.example.com) compiles successfully +/// Test that network.allowed with a valid leading wildcard (*.example.com) compiles successfully #[test] fn test_network_allow_valid_wildcard_compiles() { let temp_dir = std::env::temp_dir().join(format!( @@ -2534,7 +2534,7 @@ fn test_network_allow_valid_wildcard_compiles() { let input = r#"--- name: "Network Wildcard Agent" -description: "Agent with valid leading wildcard in network.allow" +description: "Agent with valid leading wildcard in network.allowed" network: allowed: - "*.mycompany.com" @@ -2563,7 +2563,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a trailing wildcard (example.*) fails compilation +/// Test that network.allowed with a trailing wildcard (example.*) fails compilation #[test] fn test_network_allow_trailing_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2574,7 +2574,7 @@ fn test_network_allow_trailing_wildcard_fails() { let input = r#"--- name: "Network Trailing Wildcard Agent" -description: "Agent with trailing wildcard in network.allow" +description: "Agent with trailing wildcard in network.allowed" network: allowed: - "example.*" @@ -2607,7 +2607,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a mid-string wildcard (ex*ample.com) fails compilation +/// Test that network.allowed with a mid-string wildcard (ex*ample.com) fails compilation #[test] fn test_network_allow_mid_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2618,7 +2618,7 @@ fn test_network_allow_mid_wildcard_fails() { let input = r#"--- name: "Network Mid Wildcard Agent" -description: "Agent with mid-string wildcard in network.allow" +description: "Agent with mid-string wildcard in network.allowed" network: allowed: - "ex*ample.com" @@ -2651,7 +2651,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a double wildcard (*.*.com) fails compilation +/// Test that network.allowed with a double wildcard (*.*.com) fails compilation #[test] fn test_network_allow_double_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2662,7 +2662,7 @@ fn test_network_allow_double_wildcard_fails() { let input = r#"--- name: "Network Double Wildcard Agent" -description: "Agent with double wildcard in network.allow" +description: "Agent with double wildcard in network.allowed" network: allowed: - "*.*.com" @@ -2695,7 +2695,7 @@ network: let _ = fs::remove_dir_all(&temp_dir); } -/// Test that network.allow with a bare '*' fails compilation +/// Test that network.allowed with a bare '*' fails compilation #[test] fn test_network_allow_bare_wildcard_fails() { let temp_dir = std::env::temp_dir().join(format!( @@ -2706,7 +2706,7 @@ fn test_network_allow_bare_wildcard_fails() { let input = r#"--- name: "Network Bare Wildcard Agent" -description: "Agent with bare wildcard in network.allow" +description: "Agent with bare wildcard in network.allowed" network: allowed: - "*" From e30394de4a7d4f9a5603ca48a3fa636fc90cbfed Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 15 Apr 2026 13:40:10 +0100 Subject: [PATCH 12/12] fix: add safety guards for ecosystem domain expansion - Add warning in generate_allowed_domains() when an extension requires an unknown ecosystem identifier, matching the existing guard on the user-host path. - Add depth guard (max 8) to get_ecosystem_domains() to prevent stack overflow from circular compound ecosystem references. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/standalone.rs | 11 ++++++++++- src/ecosystem_domains.rs | 31 ++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 67475f8..cafe7e5 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -313,7 +313,16 @@ fn generate_allowed_domains( for ext in extensions { for host in ext.required_hosts() { if is_ecosystem_identifier(&host) { - for domain in get_ecosystem_domains(&host) { + let domains = get_ecosystem_domains(&host); + if domains.is_empty() { + eprintln!( + "warning: extension '{}' requires unknown ecosystem '{}'; \ + no domains added", + ext.name(), + host + ); + } + for domain in domains { hosts.insert(domain); } } else { diff --git a/src/ecosystem_domains.rs b/src/ecosystem_domains.rs index c6af164..77f6651 100644 --- a/src/ecosystem_domains.rs +++ b/src/ecosystem_domains.rs @@ -45,11 +45,26 @@ static COMPOUND_ECOSYSTEMS: LazyLock>> = /// /// Returns an empty `Vec` if the identifier is unknown. pub fn get_ecosystem_domains(identifier: &str) -> Vec { + get_ecosystem_domains_inner(identifier, 0) +} + +/// Recursive inner function with a depth guard to prevent stack overflow +/// from circular compound ecosystem references. +fn get_ecosystem_domains_inner(identifier: &str, depth: u8) -> Vec { + if depth > 8 { + eprintln!( + "warning: ecosystem expansion exceeded max depth for '{}'; \ + possible cycle in compound ecosystems", + identifier + ); + return vec![]; + } + // Check compound ecosystems first if let Some(components) = COMPOUND_ECOSYSTEMS.get(identifier) { let mut domains: HashSet = HashSet::new(); for component in components { - for d in get_ecosystem_domains(component) { + for d in get_ecosystem_domains_inner(component, depth + 1) { domains.insert(d); } } @@ -230,4 +245,18 @@ mod tests { serde_json::from_str(invalid_json); assert!(result.is_err(), "invalid JSON syntax should produce an error"); } + + #[test] + fn test_depth_guard_prevents_deep_recursion() { + // get_ecosystem_domains_inner with depth > 8 returns empty + let result = get_ecosystem_domains_inner("python", 9); + assert!(result.is_empty(), "depth > 8 should short-circuit to empty"); + } + + #[test] + fn test_depth_guard_allows_normal_depth() { + // Normal calls (depth 0) should work fine + let result = get_ecosystem_domains_inner("python", 0); + assert!(!result.is_empty(), "depth 0 should return normal results"); + } }