Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Handler entry tests: `cdk/test/handlers/orchestrate-task.test.ts`, `create-task.
- **Editing on `main` directly** — ALWAYS create a worktree with a feature branch for changes, even trivial ones. Main should stay clean; all work flows through worktree → branch → PR → merge.
- **Git worktrees** — Always **`git fetch origin main`** before creating a new worktree to ensure you branch from the latest remote state. `node_modules/` and `agent/.venv/` are per-tree (not shared). Run **`mise run install`** in each new worktree before building. All CDK path references (`__dirname`-relative) and mise `config_roots` resolve correctly without extra setup.
- **Bumping Cedar engines in isolation** — `cedarpy` (Python, `agent/pyproject.toml`) and `@cedar-policy/cedar-wasm` (TypeScript, `cdk/package.json`) are two language bindings over the same Cedar Rust core. They MUST move together; even patch-version drift between bindings can yield divergent `(decision, matching_rule_ids)` on the same `(policy, input)` — invisible to per-side unit tests, caught (only) by `contracts/cedar-parity/` golden fixtures in CI. If you bump one engine you MUST bump the other to a tested-compatible version AND refresh the parity fixtures in the same commit. Both pins are EXACT (no `^`/`~`). See `docs/design/CEDAR_HITL_GATES.md` §15.6 (decision #23) and the parity-contract banner in `mise.toml`. **DO NOT** accept upstream's "Update branch" or auto-merge suggestions on cedarpy without verifying parity with cedar-wasm.
- **Dropping outbound SDK solution attribution on a new AWS client (#319)** — every outbound AWS API call carries two `User-Agent` segments: `app/uksb-wt64nei4u6#{stack}` and `md/uksb-wt64nei4u6#{component}`. The `app/` segment is **SDK-native** — it comes from the `AWS_SDK_UA_APP_ID` env var (CDK sets it on every Lambda via `SolutionUaAspect`, plus the AgentCore runtime and ECS container), so new clients get it for free as long as they run on a surface where CDK threads that env. The `md/` segment is the per-surface label and must be carried explicitly: in `agent/src/` build clients via `aws_session.tenant_client`/`tenant_resource` (tenant data) or `aws_session.platform_client` (ambient-chain calls) — never naked `boto3.client(...)`; in `cdk/src/handlers/` spread `...abcaUserAgent()` from `shared/ua.ts` into the client constructor; in `cli/src/` spread `...abcaUserAgent()` from `cli/src/ua.ts`. The three `ua` modules (`agent/src/ua.py`, `cdk/src/handlers/shared/ua.ts`, `cli/src/ua.ts`) MUST stay identical in solution id, wire format, and sanitization. (Customer opt-out: `-c sdkUaAppId=''` at deploy, or export `AWS_SDK_UA_APP_ID=''` for the CLI.)

### Tech stack

Expand Down
67 changes: 56 additions & 11 deletions agent/src/aws_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def _build_scoped_session(role_arn: str) -> Any:
)
from botocore.session import get_session as get_botocore_session

import ua

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
task_id = _tags.get("task_id", "")
# Role session name must be <=64 chars and match [\w+=,.@-]. task_id is a
Expand All @@ -138,8 +140,9 @@ def _build_scoped_session(role_arn: str) -> Any:

# A dedicated STS client built from the *ambient* (compute-role) chain.
# This is the role-chaining caller; the assumed SessionRole credentials it
# returns must NOT be used to build it, or refresh would recurse.
sts_client = boto3.client("sts", region_name=region)
# returns must NOT be used to build it, or refresh would recurse. Carries
# the static md/ UA segment so the assume-role call is attributed too.
sts_client = boto3.client("sts", region_name=region, config=ua.client_config())

def _refresh() -> dict[str, str]:
resp = sts_client.assume_role(
Expand All @@ -158,6 +161,10 @@ def _refresh() -> dict[str, str]:
}

botocore_session = get_botocore_session()
# Static md/ solution-attribution segment at the session level: it
# propagates to every client AND resource derived from this session, so
# all tenant-data calls carry it. (#319)
botocore_session.user_agent_extra = ua.static_user_agent_extra()
# Deferred: the first assume_role happens on first credential use, not now,
# so a transient STS hiccup at startup doesn't crash the agent before it
# has even begun.
Expand Down Expand Up @@ -209,10 +216,19 @@ def get_session() -> Any:
) from exc
else:
# Scoping not requested (local/dev/tests, or pre-provisioning):
# plain ambient session, behaviorally identical to pre-feature code.
_session = boto3.Session(
region_name=os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
)
# plain ambient session. Built from an explicit botocore session so
# the static md/ solution-attribution segment rides every derived
# client/resource (propagation requires the botocore session). (#319)
from botocore.session import get_session as get_botocore_session

import ua

botocore_session = get_botocore_session()
botocore_session.user_agent_extra = ua.static_user_agent_extra()
region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
if region:
botocore_session.set_config_variable("region", region)
_session = boto3.Session(botocore_session=botocore_session)
_scoped = False
return _session

Expand All @@ -224,20 +240,35 @@ def is_scoped() -> bool:
return bool(_scoped)


def _merge_ua_config(kwargs: dict[str, Any]) -> dict[str, Any]:
"""Return ``kwargs`` with the static md/ UA merged into any ``config``.

Preserves a caller-supplied ``botocore.config.Config`` by merging rather
than overwriting; supplies one carrying just the UA otherwise. (#319)
"""
import ua

ua_config = ua.client_config()
existing = kwargs.get("config")
kwargs["config"] = existing.merge(ua_config) if existing is not None else ua_config
return kwargs


def tenant_client(service_name: str, **kwargs: Any) -> Any:
"""boto3 client for tenant data.

When the per-task SessionRole is configured, the client is built from the
tag-scoped, refreshable session. Otherwise it delegates directly to
``boto3.client`` — behaviorally identical to the pre-feature code path
(and transparent to callers/tests that mock ``boto3.client``).
tag-scoped, refreshable session (which already carries the static md/ UA at
the session level). Otherwise it delegates directly to ``boto3.client`` —
behaviorally identical to the pre-feature code path (transparent to
callers/tests that mock ``boto3.client``) but with the md/ UA merged in.
"""
session = get_session()
if is_scoped():
return session.client(service_name, **kwargs)
import boto3

return boto3.client(service_name, **kwargs)
return boto3.client(service_name, **_merge_ua_config(kwargs))


def tenant_resource(service_name: str, **kwargs: Any) -> Any:
Expand All @@ -247,4 +278,18 @@ def tenant_resource(service_name: str, **kwargs: Any) -> Any:
return session.resource(service_name, **kwargs)
import boto3

return boto3.resource(service_name, **kwargs)
return boto3.resource(service_name, **_merge_ua_config(kwargs))


def platform_client(service_name: str, **kwargs: Any) -> Any:
"""boto3 client for **platform** (non-tenant) calls on the ambient chain.

For the direct ``boto3.client(...)`` sites that deliberately bypass the
scoped session (CloudWatch Logs, Secrets Manager, bedrock-agentcore): they
talk to platform resources, not tenant data, so they use the compute role's
ambient credentials — but should still carry the static md/ solution
attribution. Merges the UA into any caller ``config``. (#319)
"""
import boto3

return boto3.client(service_name, **_merge_ua_config(kwargs))
9 changes: 5 additions & 4 deletions agent/src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def resolve_github_token() -> str:
return cached
secret_arn = os.environ.get("GITHUB_TOKEN_SECRET_ARN")
if secret_arn:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("secretsmanager", region_name=region)
client = platform_client("secretsmanager", region_name=region)
resp = client.get_secret_value(SecretId=secret_arn)
token = resp["SecretString"]
# Cache in env so downstream tools (git, gh CLI) work unchanged
Expand Down Expand Up @@ -101,14 +101,15 @@ def resolve_linear_api_token(channel_metadata: dict[str, str] | None = None) ->
import json
from datetime import datetime, timedelta

import boto3
from botocore.exceptions import BotoCoreError, ClientError
except ImportError as e:
log("WARN", f"resolve_linear_api_token: boto3 unavailable ({e}); skipping")
# nosemgrep: py-silent-success-masking -- optional Linear MCP; boto3 unavailable
return ""

sm = boto3.client("secretsmanager", region_name=region)
from aws_session import platform_client

sm = platform_client("secretsmanager", region_name=region)

def _fetch_token() -> dict | None:
"""Fetch + parse the per-workspace OAuth secret.
Expand Down
4 changes: 2 additions & 2 deletions agent/src/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ def _get_client():
global _client
if _client is not None:
return _client
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
if not region:
raise ValueError("AWS_REGION or AWS_DEFAULT_REGION must be set for memory operations")
_client = boto3.client("bedrock-agentcore", region_name=region)
_client = platform_client("bedrock-agentcore", region_name=region)
return _client


Expand Down
8 changes: 4 additions & 4 deletions agent/src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,10 @@ def _warn_cw_write_blocking(log_group: str, task_id: str | None, stamped: str) -
covers both writers.
"""
try:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)

stream = f"server_warn/{task_id or 'server'}"
with _ctx_for_debug.suppress(client.exceptions.ResourceAlreadyExistsException):
Expand All @@ -193,10 +193,10 @@ def _warn_cw_write_blocking(log_group: str, task_id: str | None, stamped: str) -
def _debug_cw_write_blocking(log_group: str, task_id: str | None, stamped: str) -> None:
"""Blocking CloudWatch write — only called from a background thread."""
try:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)

stream = f"server_debug/{task_id or 'server'}"
with _ctx_for_debug.suppress(client.exceptions.ResourceAlreadyExistsException):
Expand Down
4 changes: 2 additions & 2 deletions agent/src/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ def _log_error_cw_blocking(log_group: str, task_id: str | None, stamped: str) ->
fire on the absence of the expected stream, not on this helper).
"""
try:
import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)
stream = f"agent_error/{task_id or 'unknown'}"
with contextlib.suppress(client.exceptions.ResourceAlreadyExistsException):
client.create_log_stream(logGroupName=log_group, logStreamName=stream)
Expand Down
8 changes: 4 additions & 4 deletions agent/src/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ def _emit_metrics_to_cloudwatch(json_payload: dict) -> None:
try:
import contextlib

import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
client = boto3.client("logs", region_name=region)
client = platform_client("logs", region_name=region)

task_id = json_payload.get("task_id", "unknown")
log_stream = f"metrics/{task_id}"
Expand Down Expand Up @@ -164,10 +164,10 @@ def _ensure_client(self):

import contextlib

import boto3
from aws_session import platform_client

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
self._client = boto3.client("logs", region_name=region)
self._client = platform_client("logs", region_name=region)

log_stream = f"trajectory/{self._task_id}"
with contextlib.suppress(self._client.exceptions.ResourceAlreadyExistsException):
Expand Down
76 changes: 76 additions & 0 deletions agent/src/ua.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Outbound AWS SDK User-Agent solution attribution (#319).

Every AWS API call made by the agent carries two ABCA solution-attribution
segments in the ``User-Agent`` header:

app/uksb-wt64nei4u6#{STACKNAME} <- native AWS_SDK_UA_APP_ID env (no code here)
md/uksb-wt64nei4u6#agent <- static, baked once at construction

**The ``app/`` segment is emitted by the SDK itself.** Both botocore and the
JS v3 SDK read the ``AWS_SDK_UA_APP_ID`` environment variable natively and
render it as ``app/{value}`` (botocore ``configprovider.py`` maps it to the
``user_agent_appid`` config; the value charset *includes* ``#``, so the
``uksb-wt64nei4u6#{stack}`` form survives verbatim). CDK sets that env var on
every Lambda / AgentCore runtime / ECS container, so this module contributes
**nothing** to ``app/`` — and a customer can suppress it by setting the env
var to the empty string. (This is the key simplification over the original
``/``-separated design, which had to bypass the native field because ``/`` is
not a legal app-id character. Using ``#`` keeps it native.)

This module owns only the **static ``md/`` segment** — a stable
per-component label baked once via ``user_agent_extra`` at session/client
construction. There is intentionally no per-request trace handle and no
event/middleware machinery: connection pools are never re-pinned, and
request correlation is owned by X-Ray / structured-log request ids (#245),
not the User-Agent.

The TypeScript counterparts are ``cdk/src/handlers/shared/ua.ts`` and
``cli/src/ua.ts`` — the solution id, wire format, and sanitization rules
must stay identical across all three.
"""

from __future__ import annotations

import string
from typing import Any

# AWS solution-attribution id for ABCA. Also appears (deploy-time
# counterpart, #292) in the CloudFormation stack description in
# ``cdk/src/main.ts`` and in the TS mirrors of this module. Per-surface
# literal by design.
SOLUTION_ID = "uksb-wt64nei4u6"

# Stable per-component label: this surface IS the Python agent runtime.
COMPONENT = "agent"

# RFC 7230 token charset (the UA product-token alphabet). '#' is the
# scheme's structural separator and is deliberately NOT here, so a hostile
# component/label value cannot inject extra segments.
_ALLOWED = frozenset(string.ascii_letters + string.digits + "!$%&'*+-.^_`|~")


def sanitize_ua_value(raw: str) -> str:
"""Replace every non-UA-token char (incl. non-ASCII) with ``-``."""
return "".join(c if c in _ALLOWED else "-" for c in raw)


def static_user_agent_extra() -> str:
"""The static ``md/`` segment baked at client/session construction.

Always ``md/{SOLUTION_ID}#{COMPONENT}`` — the ``app/`` segment is
contributed separately by the SDK from ``AWS_SDK_UA_APP_ID`` and is not
this module's concern.
"""
return f"md/{SOLUTION_ID}#{sanitize_ua_value(COMPONENT)}"


def client_config() -> Any:
"""``botocore.config.Config`` carrying the static ``md/`` segment.

For direct ``boto3.client(...)`` call sites that don't go through a
shared session (see ``aws_session.platform_client``). Merge-friendly:
callers that already pass a ``Config`` should use ``.merge(...)``.
"""
from botocore.config import Config

return Config(user_agent_extra=static_user_agent_extra())
55 changes: 55 additions & 0 deletions agent/tests/test_aws_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,58 @@ def test_overlong_value_truncated_to_256(self, monkeypatch):
assert len(tags["repo"]) == _MAX_TAG_VALUE_LEN == 256
# Untruncated values are passed through unchanged.
assert tags["user_id"] == "u-1"


class TestSolutionUserAgent:
"""The static md/ solution-attribution segment (#319) rides every client."""

def test_platform_client_carries_md_segment(self, monkeypatch):
monkeypatch.setenv("AWS_REGION", "us-east-1")
from aws_session import platform_client

with patch("boto3.client", return_value=MagicMock(name="logs")) as mk:
platform_client("logs", region_name="us-east-1")

cfg = mk.call_args.kwargs["config"]
assert cfg.user_agent_extra == "md/uksb-wt64nei4u6#agent"

def test_unscoped_tenant_client_carries_md_segment(self, monkeypatch):
# No SESSION_ROLE_ARN -> unscoped path delegates to boto3.client.
monkeypatch.setenv("AWS_REGION", "us-east-1")
from aws_session import tenant_client

with patch("boto3.client", return_value=MagicMock(name="ddb")) as mk:
tenant_client("dynamodb")

cfg = mk.call_args.kwargs["config"]
assert cfg.user_agent_extra == "md/uksb-wt64nei4u6#agent"

def test_caller_config_is_merged_not_overwritten(self, monkeypatch):
from botocore.config import Config

monkeypatch.setenv("AWS_REGION", "us-east-1")
from aws_session import platform_client

with patch("boto3.client", return_value=MagicMock()) as mk:
platform_client("logs", config=Config(read_timeout=7))

cfg = mk.call_args.kwargs["config"]
# Both the caller's setting and our UA survive the merge.
assert cfg.read_timeout == 7
assert cfg.user_agent_extra == "md/uksb-wt64nei4u6#agent"

def test_scoped_session_sets_session_level_extra(self, monkeypatch):
monkeypatch.setenv("AWS_REGION", "us-east-1")
monkeypatch.setenv(SESSION_ROLE_ARN_ENV, "arn:aws:iam::111122223333:role/abca-session")
configure_session(user_id="u-1", repo="owner/repo", task_id="t-abc")

fake_botocore_session = MagicMock(name="botocore-session")
with (
patch("boto3.client", return_value=MagicMock(name="sts")),
patch("boto3.Session", return_value=MagicMock(name="boto3-session")),
patch("botocore.credentials.DeferredRefreshableCredentials"),
patch("botocore.session.get_session", return_value=fake_botocore_session),
):
get_session()

assert fake_botocore_session.user_agent_extra == "md/uksb-wt64nei4u6#agent"
Loading
Loading