diff --git a/README.md b/README.md index 8acf30cf..3ca80c65 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Enforce runtime guardrails through a centralized control layer—configure once ## Quick Start -Prerequisites: Docker and Python 3.12+. +Prerequisites: Docker (or Podman, see [Podman setup](#podman-setup)) and Python 3.12+. Quick start flow: @@ -292,6 +292,51 @@ Explore working examples for popular frameworks. - [AWS Strands](examples/strands_agents/) - protect Strands workflows and tool calls - [Google ADK Decorator](examples/google_adk_decorator/) - add controls with `@control()` +## Podman Setup + +If Docker Desktop is not available, you can use [Podman](https://podman-desktop.io) as a drop-in replacement. No changes to repo files are needed — the setup below makes `docker` and `docker compose` transparently resolve to Podman. + +**One-time setup:** + +1. Install [Podman Desktop](https://podman-desktop.io) and create a machine from its UI (start it before continuing). + +2. Install `podman-compose`: + +```bash +brew install podman-compose +``` + +3. Create a `docker` shim that routes `docker compose` to `podman-compose` and everything else to `podman`: + +```bash +mkdir -p ~/.local/bin +cat > ~/.local/bin/docker << 'EOF' +#!/bin/zsh +if [[ "$1" == "compose" ]]; then + shift + exec podman-compose "$@" +fi +exec podman "$@" +EOF +chmod +x ~/.local/bin/docker +``` + +4. Add `~/.local/bin` early in your PATH (if not already): + +```bash +echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc +source ~/.zshrc +``` + +**Verify:** + +```bash +docker ps +docker compose version +``` + +After this, all existing `docker`/`docker compose` commands and `make` targets work as-is. + ## How It Works ![Agent Control Architecture](docs/images/Architecture.png) diff --git a/evaluators/contrib/galileo/README.md b/evaluators/contrib/galileo/README.md index f8461f2a..038da67a 100644 --- a/evaluators/contrib/galileo/README.md +++ b/evaluators/contrib/galileo/README.md @@ -6,9 +6,11 @@ Integration package for Galileo Luna evaluator. The `galileo.luna2` evaluator ID has been removed. Existing controls that use `galileo.luna2` should migrate to `galileo.luna` and update their evaluator -configuration to the direct Luna scorer fields (`scorer_label`, `scorer_id`, or -`scorer_version_id`, plus `threshold` and `operator`). If you still need the -legacy Luna2 evaluator, pin `agent-control-evaluator-galileo <8`. +configuration to use the direct Luna scorer fields. `scorer_id` is required; +`scorer_label` and `scorer_version_id` are optional. The evaluator calls +runners-api at `/api/v1/scorers/invoke`. Also set `threshold` and `operator` +as needed. If you still need the legacy Luna2 evaluator, pin +`agent-control-evaluator-galileo <8`. ## Install diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py index 8c971cba..239b6126 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py @@ -1,19 +1,16 @@ -"""Direct HTTP client for Galileo Luna scorer invocation.""" +"""Direct HTTP client for Galileo Luna scorer invocation via runners-api.""" from __future__ import annotations import logging import os import ssl -import warnings from asyncio import Lock from base64 import urlsafe_b64encode from hashlib import sha256 from hmac import new as hmac_new from json import dumps from time import time -from typing import Literal -from urllib.parse import urlsplit, urlunsplit import httpx from agent_control_models import JSONObject, JSONValue @@ -23,6 +20,14 @@ DEFAULT_TIMEOUT_SECS = 10.0 DEFAULT_INTERNAL_TOKEN_TTL_SECS = 3600 +RUNNERS_SCORER_INVOKE_PATH = "/api/v1/scorers/invoke" +RUNNERS_API_URL_ENV = "GALILEO_RUNNERS_API_URL" +RUNNERS_API_CA_FILE_ENV = "GALILEO_RUNNERS_API_CA_FILE" +AUTH_UPSTREAM_CA_FILE_ENV = "AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE" + +# Headers that must never be forwarded to runners-api (checked case-insensitively). +_BLOCKED_REQUEST_HEADERS = frozenset({"galileo-api-key"}) + # Keep pooled-connection reuse shorter than typical server keepalive/worker # recycle windows so requests do not pick up sockets the server already closed. DEFAULT_KEEPALIVE_EXPIRY_SECS = 1.0 @@ -33,9 +38,6 @@ LUNA_MAX_CONNECTIONS_ENV = "GALILEO_LUNA_MAX_CONNECTIONS" LUNA_MAX_KEEPALIVE_CONNECTIONS_ENV = "GALILEO_LUNA_MAX_KEEPALIVE_CONNECTIONS" LUNA_CLIENT_POOL_SIZE_ENV = "GALILEO_LUNA_CLIENT_POOL_SIZE" -PUBLIC_SCORER_INVOKE_PATH = "/scorers/invoke" -INTERNAL_SCORER_INVOKE_PATH = "/internal/scorers/invoke" -AuthMode = Literal["public", "internal"] def _b64url(data: bytes) -> str: @@ -46,7 +48,7 @@ def _internal_auth_token( api_secret: str, ttl_seconds: int = DEFAULT_INTERNAL_TOKEN_TTL_SECS, ) -> str: - """Create the internal JWT expected by Galileo API internal routes.""" + """Create the internal JWT expected by runners-api scorer invoke routes.""" now = int(time()) header = {"alg": "HS256", "typ": "JWT"} payload = { @@ -65,25 +67,6 @@ def _internal_auth_token( return f"{signing_input}.{_b64url(signature)}" -def _env_auth_mode() -> AuthMode | None: - value = os.getenv("GALILEO_LUNA_AUTH_MODE") - if value is None or value.strip() == "": - return None - deprecation_message = ( - "GALILEO_LUNA_AUTH_MODE is deprecated. Configure exactly one credential " - "(GALILEO_API_KEY for public auth, GALILEO_API_SECRET_KEY for internal " - "auth) or pass auth_mode to GalileoLunaClient." - ) - warnings.warn(deprecation_message, DeprecationWarning, stacklevel=2) - logger.warning(deprecation_message) - normalized = value.strip().lower() - if normalized == "public": - return "public" - if normalized == "internal": - return "internal" - raise ValueError("GALILEO_LUNA_AUTH_MODE must be either 'public' or 'internal'.") - - def _load_float_env(env_name: str, default: float) -> float: raw = os.getenv(env_name) if raw is None or raw.strip() == "": @@ -156,7 +139,7 @@ def _has_value(value: JSONValue) -> bool: class ScorerInvokeInputs(BaseModel): - """Input values sent to Galileo's scorer invoke API.""" + """Input values sent to the runners-api scorer invoke endpoint.""" query: JSONValue = "" response: JSONValue = "" @@ -165,39 +148,35 @@ class ScorerInvokeInputs(BaseModel): class ScorerInvokeRequest(BaseModel): - """Request payload for Galileo Luna scorer invocation. + """Request payload for runners-api scorer invocation. Attributes: + scorer_id: Required scorer identifier. + scorer_version_id: Optional pinned scorer version identifier. + scorer_label: Optional display/metadata label. inputs: Selected scorer input values. - scorer_label: Preset, registered, or fine-tuned scorer label. - scorer_id: Optional Galileo scorer identifier. - scorer_version_id: Optional Galileo scorer version identifier. - config: Optional scorer-specific configuration. + config: Scorer-specific configuration, always emitted. """ - inputs: ScorerInvokeInputs - scorer_label: str | None = Field(default=None, min_length=1) - scorer_id: str | None = Field(default=None, min_length=1) + scorer_id: str = Field(min_length=1) scorer_version_id: str | None = Field(default=None, min_length=1) - config: JSONObject | None = None + scorer_label: str | None = Field(default=None, min_length=1) + inputs: ScorerInvokeInputs + config: JSONObject = Field(default_factory=dict) @model_validator(mode="after") def ensure_required_values(self) -> ScorerInvokeRequest: - if not (self.scorer_label or self.scorer_id or self.scorer_version_id): - raise ValueError( - "One of scorer_label, scorer_id, or scorer_version_id must be set." - ) if not (_has_value(self.inputs.query) or _has_value(self.inputs.response)): raise ValueError("Either inputs.query or inputs.response must be set.") return self def to_dict(self) -> JSONObject: - """Convert to the Galileo scorer invoke API request shape.""" + """Convert to the runners-api scorer invoke request shape.""" return self.model_dump(mode="json", exclude_none=True) class ScorerInvokeResponse(BaseModel): - """Response from Galileo Luna scorer invocation. + """Response from runners-api scorer invocation. Attributes: scorer_label: Echoed scorer label, when returned. @@ -220,7 +199,7 @@ def raw_response(self) -> JSONObject: @classmethod def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse: - """Create a response model from the API JSON object.""" + """Create a response model from the runners-api JSON object.""" response = cls.model_validate( data | {"execution_time": _as_float_or_none(data.get("execution_time"))} ) @@ -229,70 +208,66 @@ def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse: class GalileoLunaClient: - """Thin HTTP client for Galileo Luna direct scorer invocation. + """Thin HTTP client for Galileo Luna scorer invocation via runners-api. Environment Variables: - GALILEO_API_SECRET_KEY: Deployment-provided Galileo API internal JWT signing secret. - GALILEO_API_KEY: Galileo API key fallback for public scorer invocation. - GALILEO_LUNA_API_URL: Galileo Luna scorer invoke API URL override. - GALILEO_API_URL: Galileo API URL fallback. - GALILEO_LUNA_CA_FILE: CA bundle used to verify the scorer API endpoint, for - deployments whose API serves an internally-issued TLS certificate. + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET: JWT signing secret for runners-api auth. + GALILEO_RUNNERS_API_URL: runners-api base URL (required). + GALILEO_RUNNERS_API_CA_FILE: CA bundle used to verify runners-api TLS. + AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE: Shared internal CA fallback. GALILEO_LUNA_KEEPALIVE_EXPIRY_SECONDS: HTTP pooled connection expiry. GALILEO_LUNA_MAX_CONNECTIONS: Maximum outbound HTTP connections. GALILEO_LUNA_MAX_KEEPALIVE_CONNECTIONS: Maximum idle pooled HTTP connections. GALILEO_LUNA_CLIENT_POOL_SIZE: Number of outbound HTTP clients to rotate across. - GALILEO_CONSOLE_URL: Galileo Console URL (optional, defaults to production). """ def __init__( self, - api_key: str | None = None, api_secret: str | None = None, - console_url: str | None = None, - api_url: str | None = None, - auth_mode: AuthMode | None = None, - ca_file: str | None = None, + runners_api_url: str | None = None, + runners_api_ca_file: str | None = None, ) -> None: """Initialize the Galileo Luna client. Args: - api_key: Galileo API key. If not provided, reads from GALILEO_API_KEY. - api_secret: Deployment-provided Galileo API secret for internal JWT auth. - If not provided, reads from GALILEO_API_SECRET_KEY. - console_url: Galileo Console URL. If not provided, reads from - GALILEO_CONSOLE_URL or uses the production console URL. - api_url: Galileo API URL. If not provided, reads from GALILEO_LUNA_API_URL, - then GALILEO_API_URL, before deriving from the console URL. - auth_mode: Auth mode to use. If not provided, inferred from the single - available credential. - ca_file: CA bundle path used to verify the scorer API endpoint. If not - provided, reads from GALILEO_LUNA_CA_FILE. Leave unset for endpoints - with publicly-trusted certificates. + api_secret: Internal JWT signing secret. If not provided, reads from + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET. + runners_api_url: runners-api base URL. If not provided, reads from + GALILEO_RUNNERS_API_URL. + runners_api_ca_file: Optional CA bundle used to verify runners-api TLS. If not + provided, reads from GALILEO_RUNNERS_API_CA_FILE, then + AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE. Raises: - ValueError: If credentials are missing, ambiguous, or incompatible with - the selected auth mode, or if the CA bundle cannot be loaded. + ValueError: If the API secret, runners-api URL, CA bundle, or connection + tuning configuration is invalid. """ resolved_api_secret = ( api_secret or os.getenv("GALILEO_API_SECRET_KEY") or os.getenv("GALILEO_API_SECRET") ) - resolved_api_key = api_key or os.getenv("GALILEO_API_KEY") - resolved_auth_mode = self._resolve_auth_mode( - auth_mode or _env_auth_mode(), - api_key=resolved_api_key, - api_secret=resolved_api_secret, - ) + if not resolved_api_secret: + raise ValueError( + "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required for Luna " + "runners-api invocation. Set one as an environment variable or pass it " + "to the constructor." + ) + + resolved_runners_url = runners_api_url or os.getenv(RUNNERS_API_URL_ENV) + if resolved_runners_url is None or resolved_runners_url.strip() == "": + raise ValueError( + "GALILEO_RUNNERS_API_URL is required for Luna runners-api invocation. " + "Set it as an environment variable or pass it to the constructor." + ) - self.api_key = resolved_api_key self.api_secret = resolved_api_secret - self.auth_mode = resolved_auth_mode - self.console_url = ( - console_url or os.getenv("GALILEO_CONSOLE_URL") or "https://console.galileo.ai" - ) - self.api_base = self._resolve_api_base(api_url) - self.ca_file = (ca_file or os.getenv("GALILEO_LUNA_CA_FILE") or "").strip() or None - self._ssl_context = self._load_ssl_context(self.ca_file) + self.runners_api_base = resolved_runners_url.strip().rstrip("/") + self.runners_api_ca_file = ( + runners_api_ca_file + or os.getenv(RUNNERS_API_CA_FILE_ENV) + or os.getenv(AUTH_UPSTREAM_CA_FILE_ENV) + or "" + ).strip() or None + self._ssl_context = self._load_ssl_context(self.runners_api_ca_file) self.keepalive_expiry_seconds = _load_float_env( LUNA_KEEPALIVE_EXPIRY_ENV, DEFAULT_KEEPALIVE_EXPIRY_SECS ) @@ -300,9 +275,7 @@ def __init__( self.max_keepalive_connections = _load_int_env( LUNA_MAX_KEEPALIVE_CONNECTIONS_ENV, DEFAULT_MAX_KEEPALIVE_CONNECTIONS ) - self.client_pool_size = _load_int_env( - LUNA_CLIENT_POOL_SIZE_ENV, DEFAULT_CLIENT_POOL_SIZE - ) + self.client_pool_size = _load_int_env(LUNA_CLIENT_POOL_SIZE_ENV, DEFAULT_CLIENT_POOL_SIZE) _validate_connection_config( keepalive_expiry_seconds=self.keepalive_expiry_seconds, max_connections=self.max_connections, @@ -313,17 +286,6 @@ def __init__( self._clients: list[httpx.AsyncClient] = [] self._next_client_index = 0 self._client_lock = Lock() - logger.info("[GalileoLunaClient] Auth mode selected: %s", self.auth_mode) - - def _resolve_api_base(self, api_url: str | None) -> str: - """Resolve the scorer invoke API base URL from explicit and environment config.""" - candidates = [api_url, os.getenv("GALILEO_LUNA_API_URL")] - candidates.append(os.getenv("GALILEO_API_URL")) - - for candidate in candidates: - if candidate and candidate.strip(): - return candidate.strip().rstrip("/") - return self._derive_api_url(self.console_url) @staticmethod def _load_ssl_context(ca_file: str | None) -> ssl.SSLContext | None: @@ -335,74 +297,11 @@ def _load_ssl_context(ca_file: str | None) -> ssl.SSLContext | None: except (OSError, ssl.SSLError) as exc: raise ValueError(f"Failed to load CA bundle from {ca_file!r}: {exc}") from exc - @staticmethod - def _resolve_auth_mode( - auth_mode: AuthMode | None, - *, - api_key: str | None, - api_secret: str | None, - ) -> AuthMode: - if auth_mode == "public": - if not api_key: - raise ValueError("GALILEO_API_KEY is required for public Luna auth.") - return "public" - - if auth_mode == "internal": - if not api_secret: - raise ValueError( - "GALILEO_API_SECRET_KEY is required for internal Luna auth." - ) - return "internal" - - if api_key and api_secret: - raise ValueError( - "Both a Galileo API key and a Galileo API secret are configured. " - "Unset one credential so the auth mode can be inferred, or pass " - "auth_mode='public' or auth_mode='internal' explicitly." - ) - if api_secret: - return "internal" - if api_key: - return "public" - raise ValueError( - "GALILEO_API_SECRET_KEY or GALILEO_API_KEY is required. " - "Set one as an environment variable or pass it to the constructor." - ) - - def _derive_api_url(self, console_url: str) -> str: - """Derive the API URL from a Galileo Console URL. - - Galileo Console hostnames use ``console.`` or ``console-`` prefixes for - canonical environments. For other HTTP(S) hosts, preserve the existing - fallback behavior of prefixing the hostname with ``api.``. - """ - url = console_url.rstrip("/") - parts = urlsplit(url) - host = parts.hostname or "" - - if host.startswith("console."): - new_host = "api." + host[len("console."):] - elif host.startswith("console-"): - new_host = "api-" + host[len("console-"):] - elif parts.scheme in {"http", "https"} and host: - new_host = f"api.{host}" - else: - return url - - return urlunsplit( - parts._replace(netloc=parts.netloc.replace(host, new_host, 1)) - ) - def _create_client(self) -> httpx.AsyncClient: - """Create an HTTP client with the configured auth, TLS, and connection limits.""" - headers = {"Content-Type": "application/json"} - if self.auth_mode == "public" and self.api_key is not None: - headers["Galileo-API-Key"] = self.api_key - verify: ssl.SSLContext | bool = ( - self._ssl_context if self._ssl_context is not None else True - ) + """Create an HTTP client with the configured TLS and connection limits.""" + verify: ssl.SSLContext | bool = self._ssl_context if self._ssl_context is not None else True return httpx.AsyncClient( - headers=headers, + headers={"Content-Type": "application/json"}, timeout=httpx.Timeout(DEFAULT_TIMEOUT_SECS), limits=httpx.Limits( max_connections=self.max_connections, @@ -436,37 +335,29 @@ async def _get_client(self) -> httpx.AsyncClient: return self._select_pooled_client() - def _endpoint_and_headers( - self, - headers: dict[str, str] | None, - ) -> tuple[str, dict[str, str]]: - request_headers = dict(headers or {}) - if self.auth_mode == "public": - return f"{self.api_base}{PUBLIC_SCORER_INVOKE_PATH}", request_headers - - if self.api_secret is None: - raise RuntimeError("Internal Luna auth mode is missing an API secret.") - request_headers["Authorization"] = f"Bearer {_internal_auth_token(self.api_secret)}" - return f"{self.api_base}{INTERNAL_SCORER_INVOKE_PATH}", request_headers + def _endpoint_and_auth_header(self) -> tuple[str, str]: + token = _internal_auth_token(self.api_secret) + endpoint = f"{self.runners_api_base}{RUNNERS_SCORER_INVOKE_PATH}" + return endpoint, f"Bearer {token}" async def invoke( self, *, - scorer_label: str | None = None, - scorer_id: str | None = None, + scorer_id: str, scorer_version_id: str | None = None, + scorer_label: str | None = None, input: JSONValue = None, output: JSONValue = None, config: JSONObject | None = None, timeout: float = DEFAULT_TIMEOUT_SECS, headers: dict[str, str] | None = None, ) -> ScorerInvokeResponse: - """Invoke a Galileo Luna scorer. + """Invoke a Galileo Luna scorer via runners-api. Args: - scorer_label: Preset, registered, or fine-tuned scorer label. - scorer_id: Optional Galileo scorer identifier. - scorer_version_id: Optional Galileo scorer version identifier. + scorer_id: Required scorer identifier. + scorer_version_id: Optional pinned scorer version identifier. + scorer_label: Optional display/metadata label. input: Optional user/system prompt text. output: Optional model response text. config: Optional scorer-specific configuration. @@ -479,24 +370,27 @@ async def invoke( Raises: ValueError: If neither input nor output is provided. RuntimeError: If the API response is not a JSON object. - httpx.HTTPStatusError: If the API returns an error status code. + httpx.HTTPStatusError: If runners-api returns an error status code. httpx.RequestError: If the request fails before a response is received. """ - if not (scorer_label or scorer_id or scorer_version_id): - raise ValueError("At least one scorer identifier must be provided.") if not (_has_value(input) or _has_value(output)): raise ValueError("At least one of input or output must be provided.") request_body = ScorerInvokeRequest( - scorer_label=scorer_label, scorer_id=scorer_id, scorer_version_id=scorer_version_id, + scorer_label=scorer_label, inputs=ScorerInvokeInputs( query="" if input is None else input, response="" if output is None else output ), - config=config, + config=config if config is not None else {}, ).to_dict() - endpoint, request_headers = self._endpoint_and_headers(headers) + + endpoint, auth_header = self._endpoint_and_auth_header() + request_headers = { + k: v for k, v in (headers or {}).items() if k.lower() not in _BLOCKED_REQUEST_HEADERS + } + request_headers["Authorization"] = auth_header logger.debug("[GalileoLunaClient] POST %s", endpoint) logger.debug("[GalileoLunaClient] Request body: %s", request_body) @@ -529,7 +423,7 @@ async def invoke( raise async def close(self) -> None: - """Close the HTTP client and release resources.""" + """Close HTTP clients and release resources.""" async with self._client_lock: clients: list[httpx.AsyncClient] = [] seen_client_ids: set[int] = set() diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py index 788fa24c..bb8a9804 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py @@ -32,9 +32,9 @@ class LunaEvaluatorConfig(EvaluatorConfig): """Configuration for direct Luna scorer evaluation. Attributes: - scorer_label: Preset, registered, or fine-tuned scorer label. - scorer_id: Optional Galileo scorer identifier. - scorer_version_id: Optional Galileo scorer version identifier. + scorer_id: Required scorer identifier for runners-api invocation. + scorer_version_id: Optional pinned scorer version identifier. + scorer_label: Optional display/metadata label. threshold: Local threshold used by the evaluator for comparison. operator: Local comparison operator. Numeric operators use threshold as a number. scorer_config: Optional scorer-specific config sent as ``config``. @@ -42,20 +42,19 @@ class LunaEvaluatorConfig(EvaluatorConfig): timeout_ms: Request timeout in milliseconds. """ - scorer_label: str | None = Field( - default=None, + scorer_id: str = Field( min_length=1, - description="Luna scorer label to invoke.", + description="Required scorer identifier for runners-api invocation.", ) - scorer_id: str | None = Field( + scorer_version_id: str | None = Field( default=None, min_length=1, - description="Optional Galileo scorer identifier to invoke.", + description="Optional pinned scorer version identifier.", ) - scorer_version_id: str | None = Field( + scorer_label: str | None = Field( default=None, min_length=1, - description="Optional Galileo scorer version identifier to invoke.", + description="Optional display/metadata label.", ) threshold: JSONValue = Field( default=0.5, @@ -69,7 +68,7 @@ class LunaEvaluatorConfig(EvaluatorConfig): default=None, alias="config", serialization_alias="config", - description="Optional scorer-specific configuration sent to Galileo.", + description="Optional scorer-specific configuration sent to runners-api.", ) payload_field: LunaPayloadField = Field( default="input", @@ -88,10 +87,6 @@ class LunaEvaluatorConfig(EvaluatorConfig): @model_validator(mode="after") def validate_threshold(self) -> LunaEvaluatorConfig: """Validate threshold compatibility with the configured operator.""" - if not (self.scorer_label or self.scorer_id or self.scorer_version_id): - raise ValueError( - "one of scorer_label, scorer_id, or scorer_version_id is required" - ) if self.operator in _NUMERIC_OPERATORS and coerce_number(self.threshold) is None: raise ValueError(f"operator '{self.operator}' requires a numeric threshold") if self.operator != "any" and self.threshold is None: diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py index 1221cedb..d0140867 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py @@ -127,17 +127,13 @@ def __init__(self, config: LunaEvaluatorConfig) -> None: config: Validated LunaEvaluatorConfig instance. Raises: - ValueError: If neither GALILEO_API_SECRET_KEY nor GALILEO_API_KEY is set. + ValueError: If neither GALILEO_API_SECRET_KEY nor GALILEO_API_SECRET is set. """ - has_auth = ( - os.getenv("GALILEO_API_SECRET_KEY") - or os.getenv("GALILEO_API_SECRET") - or os.getenv("GALILEO_API_KEY") - ) - if not has_auth: + has_secret = os.getenv("GALILEO_API_SECRET_KEY") or os.getenv("GALILEO_API_SECRET") + if not has_secret: raise ValueError( - "GALILEO_API_SECRET_KEY or GALILEO_API_KEY environment variable must be set. " - "Set an API secret for internal auth or a Galileo API key before using " + "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required for Luna " + "runners-api invocation. Set one as an environment variable before using " "galileo.luna." ) @@ -244,28 +240,30 @@ async def evaluate(self, data: Any) -> EvaluatorResult: return self._handle_error(exc) def _base_metadata(self) -> dict[str, Any]: - metadata = { - "scorer_label": self.config.scorer_label, - "scorer_id": self.config.scorer_id, - "scorer_version_id": self.config.scorer_version_id, - } - return {key: value for key, value in metadata.items() if value is not None} + metadata: dict[str, Any] = {"scorer_id": self.config.scorer_id} + if self.config.scorer_version_id is not None: + metadata["scorer_version_id"] = self.config.scorer_version_id + if self.config.scorer_label is not None: + metadata["scorer_label"] = self.config.scorer_label + return metadata def _scorer_kwargs(self) -> dict[str, Any]: - kwargs = { - "scorer_label": self.config.scorer_label, - "scorer_id": self.config.scorer_id, - "scorer_version_id": self.config.scorer_version_id, - } - return {key: value for key, value in kwargs.items() if value is not None} + kwargs: dict[str, Any] = {"scorer_id": self.config.scorer_id} + if self.config.scorer_version_id is not None: + kwargs["scorer_version_id"] = self.config.scorer_version_id + if self.config.scorer_label is not None: + kwargs["scorer_label"] = self.config.scorer_label + return kwargs def _metadata( self, response: ScorerInvokeResponse, ) -> dict[str, Any]: metadata: dict[str, Any] = self._base_metadata() + echoed_label = response.scorer_label or self.config.scorer_label + if echoed_label is not None: + metadata["scorer_label"] = echoed_label metadata.update({ - "scorer_label": response.scorer_label or self.config.scorer_label, "score": response.score, "threshold": self.config.threshold, "operator": self.config.operator, @@ -281,10 +279,8 @@ def _handle_error( ) -> EvaluatorResult: error_detail = str(error) metadata: dict[str, Any] = { + **self._base_metadata(), "error_type": type(error).__name__, - "scorer_label": self.config.scorer_label, - "scorer_id": self.config.scorer_id, - "scorer_version_id": self.config.scorer_version_id, } if isinstance(error, httpx.HTTPStatusError): metadata.update(_http_status_error_metadata(error)) diff --git a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py b/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py index e1518eec..c99d168d 100644 --- a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py +++ b/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py @@ -8,11 +8,23 @@ import json import os +from base64 import urlsafe_b64decode from unittest.mock import AsyncMock, MagicMock, patch import httpx import pytest +RUNNERS_ENV = { + "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", +} + + +def _decode_jwt_payload(token: str) -> dict[str, object]: + payload_segment = token.split(".")[1] + padded = payload_segment + ("=" * (-len(payload_segment) % 4)) + return json.loads(urlsafe_b64decode(padded.encode()).decode()) + # ============================================================================= # luna/evaluator.py: utility helpers @@ -52,8 +64,6 @@ class CannotJson: def __repr__(self): return "" - # json.dumps with default=str would actually serialize this, so use - # something that breaks both the JSON pass AND triggers TypeError. cannot = CannotJson() result = _coerce_payload_text({"obj": cannot}) @@ -108,7 +118,6 @@ def test_dict_threshold_matches_value(self): def test_other_types_return_false(self): from agent_control_evaluator_galileo.luna.evaluator import _contains - # Non-iterable score => no match. assert _contains(42, 42) is False @@ -133,7 +142,6 @@ def test_in_range_number_returned_as_is(self): def test_out_of_range_falls_back_to_one(self): from agent_control_evaluator_galileo.luna.evaluator import _confidence_from_score - # Above 1.0 → fall back to default confidence assert _confidence_from_score(7.2) == 1.0 def test_non_numeric_falls_back_to_one(self): @@ -150,11 +158,12 @@ def test_non_numeric_falls_back_to_one(self): @pytest.fixture def luna_evaluator(monkeypatch): """A ready-to-use LunaEvaluator instance with auth env wired up.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator return LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": 0.5, "operator": "gte"} + {"scorer_id": "scorer-123", "threshold": 0.5, "operator": "gte"} ) @@ -162,15 +171,12 @@ class TestScoreMatchesOperators: """Every operator branch in ``_score_matches`` should evaluate.""" def _make(self, operator, threshold, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator - if operator in {"eq", "ne", "contains"}: - threshold_value = threshold - else: - threshold_value = threshold return LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": threshold_value, "operator": operator} + {"scorer_id": "scorer-123", "threshold": threshold, "operator": operator} ) def test_any_truthy_score_matches(self, monkeypatch): @@ -218,11 +224,12 @@ def test_numeric_operator_rejects_non_numeric_score(self, monkeypatch): class TestPreparePayload: """``_prepare_payload`` routes scalar data using explicit config.""" - def test_scalar_routed_to_input_when_label_lacks_output(self, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + def test_scalar_routed_to_input_by_default(self, monkeypatch): + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) input_text, output_text = evaluator._prepare_payload("hello") @@ -230,15 +237,12 @@ def test_scalar_routed_to_input_when_label_lacks_output(self, monkeypatch): assert output_text is None def test_scalar_routed_to_output_when_payload_field_is_output(self, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator evaluator = LunaEvaluator.from_dict( - { - "scorer_label": "toxicity", - "threshold": 0.5, - "payload_field": "output", - } + {"scorer_id": "scorer-123", "threshold": 0.5, "payload_field": "output"} ) input_text, output_text = evaluator._prepare_payload("hello") @@ -246,32 +250,13 @@ def test_scalar_routed_to_output_when_payload_field_is_output(self, monkeypatch) assert input_text is None assert output_text == "hello" - def test_scalar_output_label_without_payload_field_still_defaults_to_input( - self, - monkeypatch, - ): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator - - evaluator = LunaEvaluator.from_dict( - {"scorer_label": "output_correctness", "threshold": 0.5} - ) - - input_text, output_text = evaluator._prepare_payload("hello") - - assert input_text == "hello" - assert output_text is None - def test_structured_payload_uses_input_output_keys_over_payload_field(self, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator evaluator = LunaEvaluator.from_dict( - { - "scorer_label": "toxicity", - "threshold": 0.5, - "payload_field": "output", - } + {"scorer_id": "scorer-123", "threshold": 0.5, "payload_field": "output"} ) input_text, output_text = evaluator._prepare_payload( @@ -285,10 +270,11 @@ def test_structured_payload_uses_input_output_keys_over_payload_field(self, monk @pytest.mark.asyncio async def test_evaluator_aclose_closes_underlying_client(monkeypatch): """``aclose`` must release the eagerly-created client without clearing it.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) fake = MagicMock() fake.close = AsyncMock() @@ -303,12 +289,13 @@ async def test_evaluator_aclose_closes_underlying_client(monkeypatch): @pytest.mark.asyncio async def test_evaluator_handles_non_success_status(monkeypatch): """A non-success status from the scorer must surface as an error result.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse from agent_control_evaluator_galileo.luna.client import GalileoLunaClient evaluator = LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": 0.5, "operator": "gte"} + {"scorer_id": "scorer-123", "threshold": 0.5, "operator": "gte"} ) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: @@ -417,198 +404,150 @@ def test_scalar_other_types_have_value(self): from agent_control_evaluator_galileo.luna.client import _has_value assert _has_value(42) is True - assert _has_value(0) is True # 0 is a real value, not empty + assert _has_value(0) is True assert _has_value(True) is True class TestScorerInvokeRequestValidation: """``ScorerInvokeRequest`` rejects malformed input combos.""" - def test_missing_all_identifiers_raises(self): + def test_missing_scorer_id_raises(self): from agent_control_evaluator_galileo.luna.client import ( ScorerInvokeInputs, ScorerInvokeRequest, ) from pydantic import ValidationError - with pytest.raises(ValidationError, match="One of scorer_label"): + with pytest.raises(ValidationError, match="scorer_id"): ScorerInvokeRequest(inputs=ScorerInvokeInputs(query="hello")) -def test_client_raises_when_no_credentials(monkeypatch): - """The client requires at least an API secret or an API key.""" - for name in ( - "GALILEO_API_SECRET_KEY", - "GALILEO_API_SECRET", - "GALILEO_API_KEY", - "GALILEO_LUNA_AUTH_MODE", - ): +def test_client_raises_when_no_api_secret(monkeypatch): + """The client requires GALILEO_API_SECRET_KEY or GALILEO_API_SECRET.""" + for name in ("GALILEO_API_SECRET_KEY", "GALILEO_API_SECRET"): monkeypatch.delenv(name, raising=False) + monkeypatch.setenv("GALILEO_RUNNERS_API_URL", "http://runners-api:8090") from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY"): + with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_SECRET"): GalileoLunaClient() -def test_client_requires_explicit_mode_when_both_credentials_are_present(monkeypatch): - """A mixed credential environment must not silently choose an auth route.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") - monkeypatch.delenv("GALILEO_LUNA_AUTH_MODE", raising=False) +def test_client_raises_when_no_runners_api_url(monkeypatch): + """The client requires GALILEO_RUNNERS_API_URL.""" + monkeypatch.setenv("GALILEO_API_SECRET_KEY", "test-secret") + monkeypatch.delenv("GALILEO_RUNNERS_API_URL", raising=False) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.raises( - ValueError, match="Both a Galileo API key and a Galileo API secret are configured" - ): + with pytest.raises(ValueError, match="GALILEO_RUNNERS_API_URL"): GalileoLunaClient() -def test_client_uses_explicit_public_mode_when_both_credentials_are_present(monkeypatch): - """Explicit public mode should use the API-key route even if a secret is also set.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "public") +def test_client_jwt_has_internal_scope(monkeypatch): + """JWT produced by the client must carry internal=True and scope=scorers.invoke.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): - client = GalileoLunaClient() + client = GalileoLunaClient() + _, auth_header = client._endpoint_and_auth_header() - assert client.auth_mode == "public" - endpoint, request_headers = client._endpoint_and_headers(None) - assert endpoint.endswith("/scorers/invoke") - assert "Authorization" not in request_headers + assert auth_header.startswith("Bearer ") + payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) + assert payload["internal"] is True + assert payload["scope"] == "scorers.invoke" -def test_client_uses_explicit_internal_mode_when_both_credentials_are_present(monkeypatch): - """Explicit internal mode should use the internal JWT route.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "internal") +def test_client_posts_to_correct_runners_api_endpoint(monkeypatch): + """_endpoint_and_auth_header must return the runners-api /api/v1/scorers/invoke path.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): - client = GalileoLunaClient() - - assert client.auth_mode == "internal" - endpoint, request_headers = client._endpoint_and_headers(None) - assert endpoint.endswith("/internal/scorers/invoke") - assert request_headers["Authorization"].startswith("Bearer ") - - -def test_client_rejects_mode_without_matching_credential(monkeypatch): - """The selected mode must have its matching credential configured.""" - monkeypatch.delenv("GALILEO_API_SECRET_KEY", raising=False) - monkeypatch.delenv("GALILEO_API_SECRET", raising=False) - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "internal") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + client = GalileoLunaClient() + endpoint, _ = client._endpoint_and_auth_header() - with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): - with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY"): - GalileoLunaClient() + assert endpoint == "http://runners-api:8090/api/v1/scorers/invoke" -def test_client_rejects_invalid_auth_mode(monkeypatch): - """Invalid auth mode values should fail during client initialization.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "sideways") +def test_client_does_not_use_old_api_paths(monkeypatch): + """The client must not reference /scorers/invoke or /internal/scorers/invoke.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): - with pytest.raises(ValueError, match="GALILEO_LUNA_AUTH_MODE"): - GalileoLunaClient() - - -class TestDeriveApiUrl: - """URL derivation covers every console.* → api.* substitution branch.""" - - def _client(self, monkeypatch): - monkeypatch.delenv("GALILEO_API_SECRET_KEY", raising=False) - monkeypatch.delenv("GALILEO_API_SECRET", raising=False) - monkeypatch.delenv("GALILEO_LUNA_AUTH_MODE", raising=False) - monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - - return GalileoLunaClient() - - def test_console_dot_rewritten_to_api_dot(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://console.galileo.ai") - == "https://api.galileo.ai" - ) - - def test_console_dash_rewritten_to_api_dash(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://console-staging.galileo.ai") - == "https://api-staging.galileo.ai" - ) - - def test_plain_https_host_gets_api_prefix(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://example.com") - == "https://api.example.com" - ) - - def test_non_prefix_console_substring_gets_api_prefix(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://my-console.example.com") - == "https://api.my-console.example.com" - ) - - def test_console_substring_in_path_does_not_rewrite_path(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://app.galileo.ai/console.html") - == "https://api.app.galileo.ai/console.html" - ) - - def test_plain_http_host_gets_api_prefix(self, monkeypatch): - client = self._client(monkeypatch) - assert client._derive_api_url("http://example.com") == "http://api.example.com" + client = GalileoLunaClient() + endpoint, _ = client._endpoint_and_auth_header() - def test_unknown_scheme_returned_as_is(self, monkeypatch): - client = self._client(monkeypatch) - # No console./console- prefix, no http(s) scheme → return unchanged. - assert client._derive_api_url("api.example.com") == "api.example.com" + assert "/scorers/invoke" in endpoint + assert endpoint.startswith("http://runners-api:8090/api/v1/") + assert "/internal/scorers/invoke" not in endpoint @pytest.mark.asyncio -async def test_get_client_adds_api_key_header_when_no_secret(monkeypatch): - """When only an API key is configured, the public-API header is set.""" - monkeypatch.delenv("GALILEO_API_SECRET_KEY", raising=False) - monkeypatch.delenv("GALILEO_API_SECRET", raising=False) - monkeypatch.setenv("GALILEO_API_KEY", "public-key") +async def test_get_client_does_not_set_galileo_api_key_header(monkeypatch): + """The HTTP client must never include a Galileo-API-Key header.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() http_client = await client._get_client() try: - assert http_client.headers.get("Galileo-API-Key") == "public-key" + assert "Galileo-API-Key" not in http_client.headers + assert "galileo-api-key" not in http_client.headers finally: await client.close() @pytest.mark.asyncio -async def test_invoke_rejects_missing_scorer_identifier(monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") +async def test_get_client_uses_configured_runners_api_ca_file(monkeypatch): + """The HTTP client should verify internal runners-api TLS with the configured CA.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + monkeypatch.setenv("GALILEO_RUNNERS_API_CA_FILE", "/etc/galileo/runners-api-ca.crt") from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - client = GalileoLunaClient() - try: - with pytest.raises(ValueError, match="At least one scorer identifier"): - await client.invoke(input="hello") - finally: - await client.close() + ssl_context = object() + with ( + patch.object(GalileoLunaClient, "_load_ssl_context", return_value=ssl_context), + patch("httpx.AsyncClient") as async_client, + ): + client = GalileoLunaClient() + await client._get_client() + + assert client.runners_api_ca_file == "/etc/galileo/runners-api-ca.crt" + assert async_client.call_args.kwargs["verify"] is ssl_context + + +@pytest.mark.asyncio +async def test_get_client_falls_back_to_agent_control_auth_upstream_ca_file(monkeypatch): + """Galileo in-cluster Agent Control pods already mount the internal CA for auth upstream.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + monkeypatch.delenv("GALILEO_RUNNERS_API_CA_FILE", raising=False) + monkeypatch.setenv( + "AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE", "/etc/agent-control/auth-upstream-ca/ca.crt" + ) + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + ssl_context = object() + with ( + patch.object(GalileoLunaClient, "_load_ssl_context", return_value=ssl_context), + patch("httpx.AsyncClient") as async_client, + ): + client = GalileoLunaClient() + await client._get_client() + + assert client.runners_api_ca_file == "/etc/agent-control/auth-upstream-ca/ca.crt" + assert async_client.call_args.kwargs["verify"] is ssl_context @pytest.mark.asyncio async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): """A non-object JSON body must surface as a clear RuntimeError.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -624,7 +563,7 @@ async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): try: with pytest.raises(RuntimeError, match="not a JSON object"): - await client.invoke(scorer_label="toxicity", input="hello") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() @@ -632,7 +571,8 @@ async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): @pytest.mark.asyncio async def test_invoke_propagates_http_status_error(monkeypatch): """The client logs and re-raises HTTP status errors.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -653,7 +593,7 @@ async def test_invoke_propagates_http_status_error(monkeypatch): try: with pytest.raises(httpx.HTTPStatusError): - await client.invoke(scorer_label="toxicity", input="hello") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() @@ -661,7 +601,8 @@ async def test_invoke_propagates_http_status_error(monkeypatch): @pytest.mark.asyncio async def test_invoke_propagates_request_error(monkeypatch): """RequestError is logged and re-raised so callers can decide policy.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -673,7 +614,7 @@ async def test_invoke_propagates_request_error(monkeypatch): try: with pytest.raises(httpx.RequestError): - await client.invoke(scorer_label="toxicity", input="hello") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() @@ -681,13 +622,68 @@ async def test_invoke_propagates_request_error(monkeypatch): @pytest.mark.asyncio async def test_client_async_context_manager_closes_on_exit(monkeypatch): """Entering/exiting the async context manager must close the client.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient async with GalileoLunaClient() as client: - # Trigger lazy client creation so close() has work to do. await client._get_client() assert client._client is not None - # __aexit__ closes the underlying httpx client. assert client._client is None + + +@pytest.mark.asyncio +async def test_invoke_strips_caller_supplied_galileo_api_key_header(monkeypatch): + """Regression: a Galileo-API-Key passed via the headers kwarg must be stripped.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + captured: dict[str, object] = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["headers"] = dict(request.headers) + return httpx.Response(200, json={"score": 0.9, "status": "success"}) + + client = GalileoLunaClient() + client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + + try: + await client.invoke( + scorer_id="scorer-123", + input="hello", + headers={"Galileo-API-Key": "should-be-stripped", "X-Custom": "keep-me"}, + ) + finally: + await client.close() + + headers = captured["headers"] + assert isinstance(headers, dict) + assert "galileo-api-key" not in headers + assert headers.get("x-custom") == "keep-me" + + +@pytest.mark.asyncio +async def test_invoke_always_emits_config_field(monkeypatch): + """Regression: config must always be present in the request body, defaulting to {}.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + captured: dict[str, object] = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["body"] = json.loads(request.content.decode()) + return httpx.Response(200, json={"score": 0.5, "status": "success"}) + + client = GalileoLunaClient() + client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + + try: + await client.invoke(scorer_id="scorer-123", input="hello") + finally: + await client.close() + + assert "config" in captured["body"] + assert captured["body"]["config"] == {} diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py index 86328e1c..00324462 100644 --- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py +++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py @@ -4,7 +4,6 @@ import asyncio import json -import logging import os from base64 import urlsafe_b64decode from unittest.mock import AsyncMock, patch @@ -14,6 +13,11 @@ from agent_control_models import EvaluatorResult from pydantic import ValidationError +RUNNERS_ENV = { + "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", +} + def _decode_jwt_payload(token: str) -> dict[str, object]: payload_segment = token.split(".")[1] @@ -24,23 +28,21 @@ def _decode_jwt_payload(token: str) -> dict[str, object]: class TestLunaEvaluatorConfig: """Tests for direct Luna evaluator configuration.""" - def test_config_accepts_direct_scorer_fields(self) -> None: + def test_config_accepts_scorer_id_with_all_optional_fields(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig - # Given: a direct scorer config with local thresholding config = LunaEvaluatorConfig( - scorer_label="toxicity", scorer_id="scorer-123", scorer_version_id="version-123", + scorer_label="toxicity", threshold=0.7, operator="gte", config={"temperature": 0}, ) - # Then: config is retained without Protect concepts - assert config.scorer_label == "toxicity" assert config.scorer_id == "scorer-123" assert config.scorer_version_id == "version-123" + assert config.scorer_label == "toxicity" assert config.threshold == 0.7 assert config.operator == "gte" assert config.scorer_config == {"temperature": 0} @@ -54,40 +56,58 @@ def test_config_accepts_scorer_id_without_label(self) -> None: assert config.scorer_id == "scorer-123" assert config.scorer_label is None - def test_config_requires_a_scorer_identifier(self) -> None: + def test_config_requires_scorer_id(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig - with pytest.raises(ValidationError, match="one of scorer_label"): + with pytest.raises(ValidationError, match="scorer_id"): LunaEvaluatorConfig(threshold=0.5) + def test_config_rejects_label_only(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + + with pytest.raises(ValidationError, match="scorer_id"): + LunaEvaluatorConfig(scorer_label="toxicity", threshold=0.5) + + def test_config_rejects_version_only(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + + with pytest.raises(ValidationError, match="scorer_id"): + LunaEvaluatorConfig(scorer_version_id="version-123", threshold=0.5) + def test_numeric_operator_requires_numeric_threshold(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig - # Given/When/Then: numeric local comparison rejects non-numeric thresholds with pytest.raises(ValidationError, match="numeric threshold"): - LunaEvaluatorConfig(scorer_label="toxicity", threshold="high", operator="gte") + LunaEvaluatorConfig(scorer_id="scorer-123", threshold="high", operator="gte") class TestGalileoLunaClient: """Tests for the GalileoLunaClient HTTP contract.""" - def test_scorer_invoke_request_matches_api_schema_shape(self) -> None: + def test_scorer_invoke_request_requires_scorer_id(self) -> None: + from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest + + with pytest.raises(ValidationError, match="scorer_id"): + ScorerInvokeRequest( + scorer_label="toxicity", + inputs=ScorerInvokeInputs(query="hello"), + ) + + def test_scorer_invoke_request_shape_with_all_fields(self) -> None: from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest - # Given: a scorer request with scorer config request = ScorerInvokeRequest( - scorer_label="toxicity", scorer_id="scorer-123", scorer_version_id="version-123", + scorer_label="toxicity", inputs=ScorerInvokeInputs(query={"messages": [{"role": "user", "content": "hello"}]}), config={"top_k": 1}, ) - # Then: the serialized payload uses the API-owned scorer invoke fields assert request.to_dict() == { - "scorer_label": "toxicity", "scorer_id": "scorer-123", "scorer_version_id": "version-123", + "scorer_label": "toxicity", "inputs": { "query": {"messages": [{"role": "user", "content": "hello"}]}, "response": "", @@ -95,23 +115,35 @@ def test_scorer_invoke_request_matches_api_schema_shape(self) -> None: "config": {"top_k": 1}, } + def test_scorer_invoke_request_omits_optional_fields_when_absent(self) -> None: + from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest + + request = ScorerInvokeRequest( + scorer_id="scorer-123", + inputs=ScorerInvokeInputs(query="hello"), + ) + + body = request.to_dict() + assert body["scorer_id"] == "scorer-123" + assert "scorer_version_id" not in body + assert "scorer_label" not in body + assert body["config"] == {} + @pytest.mark.parametrize("empty_value", ["", " ", {}, []]) def test_scorer_invoke_request_requires_input_or_output(self, empty_value: object) -> None: from agent_control_evaluator_galileo.luna import ScorerInvokeRequest - # Given/When/Then: the request mirrors API validation with pytest.raises( ValidationError, match="Either inputs.query or inputs.response must be set" ): ScorerInvokeRequest( - scorer_label="toxicity", + scorer_id="scorer-123", inputs={"query": empty_value, "response": empty_value}, ) - def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: + def test_scorer_invoke_response_shape(self) -> None: from agent_control_evaluator_galileo.luna import ScorerInvokeResponse - # Given: an API scorer invoke response response = ScorerInvokeResponse.from_dict( { "scorer_label": "toxicity", @@ -122,7 +154,6 @@ def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: } ) - # Then: the model exposes the API response fields assert response.model_dump() == { "scorer_label": "toxicity", "score": 0.82, @@ -130,137 +161,35 @@ def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: "execution_time": 0.12, "error_message": None, } - assert response.scorer_label == "toxicity" assert response.raw_response["scorer_label"] == "toxicity" - def test_client_uses_protect_api_url_derivation(self) -> None: + def test_client_strips_whitespace_from_runners_api_url(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient - # Given: the same console URL shape used by Protect - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): - client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io") - - # Then: the API URL is derived the same way - assert client.api_base == "https://api.demo-v2.galileocloud.io" - - def test_client_uses_galileo_api_url_when_set(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: an explicit custom-environment API URL with patch.dict( os.environ, - { - "GALILEO_API_KEY": "test-key", - "GALILEO_API_URL": "https://api-test-luna.example.com/", - }, + RUNNERS_ENV | {"GALILEO_RUNNERS_API_URL": " http://runners-api:8090/ "}, clear=True, ): - client = GalileoLunaClient(console_url="https://console-test-luna.example.com") + client = GalileoLunaClient() - # Then: the explicit API URL wins over console URL derivation - assert client.api_base == "https://api-test-luna.example.com" + assert client.runners_api_base == "http://runners-api:8090" - def test_client_uses_luna_api_url_when_set(self) -> None: + def test_client_rejects_unreadable_runners_api_ca_bundle(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient - # Given: a Luna-specific API URL and a general API URL are both configured with patch.dict( os.environ, - { - "GALILEO_API_KEY": "test-key", - "GALILEO_LUNA_API_URL": "https://luna-api.example.com/", - "GALILEO_API_URL": "https://api.example.com", - }, + RUNNERS_ENV | {"GALILEO_RUNNERS_API_CA_FILE": "/nonexistent/ca.pem"}, clear=True, ): - client = GalileoLunaClient(console_url="https://console.example.com") - - # Then: the Luna-specific URL wins without changing the general API URL contract - assert client.api_base == "https://luna-api.example.com" - - def test_client_uses_luna_api_url_for_internal_auth(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: internal auth and both Luna-specific and general API URLs are configured - with patch.dict( - os.environ, - { - "GALILEO_API_SECRET_KEY": "test-secret", - "GALILEO_LUNA_API_URL": "https://internal-api.example.com", - "GALILEO_API_URL": "https://api-public.example.com", - }, - clear=True, - ): - client = GalileoLunaClient(console_url="https://console.example.com") - - # Then: internal scorer invocation uses the Luna-specific API base - assert client.api_base == "https://internal-api.example.com" - - def test_client_derives_api_url_from_console_dash_hostname(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: a console- hostname - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): - client = GalileoLunaClient(console_url="https://console-test-luna.example.com") - - # Then: the matching api- hostname is used - assert client.api_base == "https://api-test-luna.example.com" - - def test_client_strips_whitespace_from_env_url(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: a URL override padded with whitespace and a trailing slash - with patch.dict( - os.environ, - { - "GALILEO_API_KEY": "test-key", - "GALILEO_LUNA_API_URL": " https://luna-api.example.com/ ", - }, - clear=True, - ): - client = GalileoLunaClient(console_url="https://console.example.com") - - # Then: the resolved base URL is trimmed and slash-free - assert client.api_base == "https://luna-api.example.com" - - def test_client_warns_when_deprecated_auth_mode_env_is_set( - self, caplog: pytest.LogCaptureFixture - ) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: the deprecated auth-mode environment variable - caplog.set_level(logging.WARNING) - with patch.dict( - os.environ, - {"GALILEO_API_KEY": "test-key", "GALILEO_LUNA_AUTH_MODE": "public"}, - clear=True, - ): - # When/Then: construction still works but emits a deprecation warning - with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): - client = GalileoLunaClient(console_url="https://console.example.com") - - assert client.auth_mode == "public" - assert "GALILEO_LUNA_AUTH_MODE is deprecated" in caplog.text - - def test_client_rejects_unreadable_ca_bundle(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: a CA bundle path that does not exist - with patch.dict( - os.environ, - { - "GALILEO_API_SECRET_KEY": "test-secret", - "GALILEO_LUNA_CA_FILE": "/nonexistent/ca.pem", - }, - clear=True, - ): - # When/Then: client construction fails fast instead of at first request with pytest.raises(ValueError, match="Failed to load CA bundle"): - GalileoLunaClient(console_url="https://console.example.com") + GalileoLunaClient() @pytest.mark.asyncio async def test_client_applies_ca_bundle_and_connection_limits(self) -> None: - import certifi + import ssl + from agent_control_evaluator_galileo.luna import GalileoLunaClient from agent_control_evaluator_galileo.luna.client import ( DEFAULT_KEEPALIVE_EXPIRY_SECS, @@ -275,11 +204,12 @@ def recording_client(**kwargs: object) -> httpx.AsyncClient: captured.update(kwargs) return real_async_client(**kwargs) - # Given: internal auth with a CA bundle configured - with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True): - client = GalileoLunaClient( - console_url="https://console.example.com", ca_file=certifi.where() - ) + ssl_context = ssl.create_default_context() + with ( + patch.dict(os.environ, RUNNERS_ENV, clear=True), + patch.object(GalileoLunaClient, "_load_ssl_context", return_value=ssl_context), + ): + client = GalileoLunaClient(runners_api_ca_file="/etc/runners-api-ca.pem") with patch( "agent_control_evaluator_galileo.luna.client.httpx.AsyncClient", recording_client @@ -289,8 +219,6 @@ def recording_client(**kwargs: object) -> httpx.AsyncClient: finally: await client.close() - # Then: TLS verification uses the configured CA bundle and pooled - # connections expire quickly so closed server sockets are not reused assert captured["verify"] is client._ssl_context limits = captured["limits"] assert isinstance(limits, httpx.Limits) @@ -313,13 +241,14 @@ def recording_client(**kwargs: object) -> httpx.AsyncClient: os.environ, { "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", "GALILEO_LUNA_KEEPALIVE_EXPIRY_SECONDS": "0.25", "GALILEO_LUNA_MAX_CONNECTIONS": "17", "GALILEO_LUNA_MAX_KEEPALIVE_CONNECTIONS": "4", }, clear=True, ): - client = GalileoLunaClient(console_url="https://console.example.com") + client = GalileoLunaClient() with patch( "agent_control_evaluator_galileo.luna.client.httpx.AsyncClient", recording_client @@ -351,6 +280,7 @@ def test_client_ignores_empty_connection_tuning_env(self) -> None: os.environ, { "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", "GALILEO_LUNA_KEEPALIVE_EXPIRY_SECONDS": "", "GALILEO_LUNA_MAX_CONNECTIONS": " ", "GALILEO_LUNA_MAX_KEEPALIVE_CONNECTIONS": "", @@ -358,7 +288,7 @@ def test_client_ignores_empty_connection_tuning_env(self) -> None: }, clear=True, ): - client = GalileoLunaClient(console_url="https://console.example.com") + client = GalileoLunaClient() assert client.keepalive_expiry_seconds == DEFAULT_KEEPALIVE_EXPIRY_SECS assert client.max_connections == DEFAULT_MAX_CONNECTIONS @@ -389,11 +319,12 @@ def recording_client(**kwargs: object) -> FakeAsyncClient: os.environ, { "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", "GALILEO_LUNA_CLIENT_POOL_SIZE": "3", }, clear=True, ): - client = GalileoLunaClient(console_url="https://console.example.com") + client = GalileoLunaClient() with patch.object(luna_client_module.httpx, "AsyncClient", recording_client): try: @@ -420,11 +351,12 @@ async def aclose(self) -> None: os.environ, { "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", "GALILEO_LUNA_CLIENT_POOL_SIZE": "2", }, clear=True, ): - client = GalileoLunaClient(console_url="https://console.example.com") + client = GalileoLunaClient() first_client = FakeAsyncClient() second_client = FakeAsyncClient() @@ -454,8 +386,8 @@ class FakeAsyncClient: async def aclose(self) -> None: self.is_closed = True - with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True): - client = GalileoLunaClient(console_url="https://console.example.com") + with patch.dict(os.environ, RUNNERS_ENV, clear=True): + client = GalileoLunaClient() http_client = FakeAsyncClient() client._client = http_client # type: ignore[assignment] @@ -510,15 +442,15 @@ def test_client_reports_invalid_connection_tuning_env( ) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient - env = {"GALILEO_API_SECRET_KEY": "test-secret"} | env_values + env = RUNNERS_ENV | env_values with patch.dict(os.environ, env, clear=True): with pytest.raises(ValueError) as exc_info: - GalileoLunaClient(console_url="https://console.example.com") + GalileoLunaClient() assert expected in str(exc_info.value) @pytest.mark.asyncio - async def test_client_posts_to_scorers_invoke_without_protect_fields(self) -> None: + async def test_client_posts_to_runners_api_scorer_invoke(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient captured: dict[str, object] = {} @@ -537,21 +469,14 @@ def handler(request: httpx.Request) -> httpx.Response: }, ) - # Given: a Luna client with a mock HTTP transport - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): - client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io") - client._client = httpx.AsyncClient( - transport=httpx.MockTransport(handler), - headers={ - "Galileo-API-Key": client.api_key, - "Content-Type": "application/json", - }, - ) + # Given: a Luna client pointing at runners-api + with patch.dict(os.environ, RUNNERS_ENV, clear=True): + client = GalileoLunaClient() + client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) try: - # When: invoking a scorer response = await client.invoke( - scorer_label="toxicity", + scorer_id="scorer-123", input="user prompt", output="model answer", config={"top_k": 1}, @@ -559,103 +484,74 @@ def handler(request: httpx.Request) -> httpx.Response: finally: await client.close() - # Then: the direct scorer endpoint and body are used + # Then: posts to runners-api /api/v1/scorers/invoke with JWT, no Galileo-API-Key assert response.score == 0.82 - assert captured["url"] == "https://api.demo-v2.galileocloud.io/scorers/invoke" + assert captured["url"] == "http://runners-api:8090/api/v1/scorers/invoke" assert captured["body"] == { - "scorer_label": "toxicity", + "scorer_id": "scorer-123", "inputs": {"query": "user prompt", "response": "model answer"}, "config": {"top_k": 1}, } - assert "stage_name" not in captured["body"] - assert "prioritized_rulesets" not in captured["body"] headers = captured["headers"] assert isinstance(headers, dict) - assert headers["galileo-api-key"] == "test-key" + assert "galileo-api-key" not in headers + auth_header = headers["authorization"] + assert isinstance(auth_header, str) + assert auth_header.startswith("Bearer ") + payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) + assert payload["internal"] is True + assert payload["scope"] == "scorers.invoke" @pytest.mark.asyncio - async def test_client_uses_internal_jwt_when_api_secret_is_set(self) -> None: + async def test_client_forwards_scorer_version_id_when_configured(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient captured: dict[str, object] = {} def handler(request: httpx.Request) -> httpx.Response: - captured["url"] = str(request.url) - captured["headers"] = dict(request.headers) captured["body"] = json.loads(request.content.decode()) return httpx.Response( - 200, - json={ - "scorer_label": "toxicity", - "score": 0.82, - "status": "success", - "execution_time": 0.12, - }, + 200, json={"score": 0.5, "status": "success"} ) - # Given: a Luna client configured with the Galileo API internal secret - with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True): - client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088") + with patch.dict(os.environ, RUNNERS_ENV, clear=True): + client = GalileoLunaClient() client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) try: - # When: invoking a scorer with internal JWT auth - response = await client.invoke(scorer_label="toxicity", output="model answer") + await client.invoke( + scorer_id="scorer-123", + scorer_version_id="version-456", + input="hello", + ) finally: await client.close() - # Then: the internal scorer endpoint is called with an internal JWT - assert response.score == 0.82 - assert ( - captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke" - ) - assert captured["body"] == { - "scorer_label": "toxicity", - "inputs": {"query": "", "response": "model answer"}, - } - headers = captured["headers"] - assert isinstance(headers, dict) - assert "galileo-api-key" not in headers - auth_header = headers["authorization"] - assert isinstance(auth_header, str) - assert auth_header.startswith("Bearer ") - token_payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) - assert token_payload["internal"] is True - assert token_payload["scope"] == "scorers.invoke" + assert captured["body"]["scorer_version_id"] == "version-456" @pytest.mark.asyncio - async def test_client_uses_internal_jwt_without_api_key(self) -> None: + async def test_client_omits_galileo_api_key_even_when_env_is_set(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient - # Given: a Luna client configured with internal JWT auth - with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True): - client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088") - captured: dict[str, object] = {} def handler(request: httpx.Request) -> httpx.Response: captured["headers"] = dict(request.headers) - return httpx.Response( - 200, - json={"scorer_label": "toxicity", "score": 0.82, "status": "success"}, - ) + return httpx.Response(200, json={"score": 0.5, "status": "success"}) + env = {**RUNNERS_ENV, "GALILEO_API_KEY": "should-not-be-sent"} + with patch.dict(os.environ, env, clear=True): + client = GalileoLunaClient() client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + try: - # When: invoking without project context - response = await client.invoke(scorer_label="toxicity", output="model answer") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() - # Then: internal JWT auth still works - assert response.score == 0.82 headers = captured["headers"] assert isinstance(headers, dict) - auth_header = headers["authorization"] - assert isinstance(auth_header, str) - token_payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) - assert token_payload["internal"] is True - assert token_payload["scope"] == "scorers.invoke" + assert "galileo-api-key" not in headers @pytest.mark.asyncio @pytest.mark.parametrize("empty_value", ["", " ", {}, []]) @@ -664,19 +560,17 @@ async def test_client_rejects_missing_input_and_output_values( ) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient - # Given: a Luna client and scorer input values that API treats as missing - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): - client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088") + with patch.dict(os.environ, RUNNERS_ENV, clear=True): + client = GalileoLunaClient() - # When/Then: the client rejects the request before calling API with pytest.raises(ValueError, match="At least one of input or output must be provided"): - await client.invoke(scorer_label="toxicity", input=empty_value, output=empty_value) + await client.invoke(scorer_id="scorer-123", input=empty_value, output=empty_value) class TestLunaEvaluator: """Tests for direct Luna evaluator behavior.""" - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) def test_evaluator_metadata(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator @@ -687,31 +581,26 @@ def test_evaluator_metadata(self) -> None: def test_evaluator_init_without_auth_raises(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator - with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_KEY"): - LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_SECRET"): + LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) - @patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True) + @patch.dict(os.environ, RUNNERS_ENV, clear=True) def test_evaluator_init_accepts_api_secret(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator - evaluator = LunaEvaluator.from_dict( - { - "scorer_label": "toxicity", - "threshold": 0.5, - } - ) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) - assert evaluator.config.scorer_label == "toxicity" + assert evaluator.config.scorer_id == "scorer-123" - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: a direct Luna evaluator and a raw successful scorer response evaluator = LunaEvaluator.from_dict( { + "scorer_id": "scorer-123", "scorer_label": "toxicity", "threshold": 0.7, "operator": "gte", @@ -727,7 +616,6 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: execution_time=0.1, ) - # When: evaluating a full step payload result = await evaluator.evaluate( { "input": "user prompt", @@ -735,11 +623,11 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: } ) - # Then: the raw score is thresholded locally and no Protect fields are sent assert isinstance(result, EvaluatorResult) assert result.matched is True assert result.confidence == 0.82 assert result.metadata == { + "scorer_id": "scorer-123", "scorer_label": "toxicity", "score": 0.82, "threshold": 0.7, @@ -749,6 +637,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: "error_message": None, } mock_invoke.assert_awaited_once_with( + scorer_id="scorer-123", scorer_label="toxicity", input="user prompt", output="model answer", @@ -756,15 +645,48 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: timeout=5.0, ) - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) + @pytest.mark.asyncio + async def test_evaluator_forwards_configured_scorer_version_id(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + evaluator = LunaEvaluator.from_dict( + { + "scorer_id": "scorer-123", + "scorer_version_id": "version-456", + "threshold": 0.5, + "operator": "gte", + } + ) + + with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: + mock_invoke.return_value = ScorerInvokeResponse( + score=0.82, + status="success", + ) + + result = await evaluator.evaluate("hello") + + assert result.matched is True + assert result.metadata["scorer_version_id"] == "version-456" + mock_invoke.assert_awaited_once_with( + scorer_id="scorer-123", + scorer_version_id="version-456", + input="hello", + output=None, + config=None, + timeout=10.0, + ) + + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_returns_non_match_below_threshold(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: a raw scorer value below the local threshold evaluator = LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": 0.7, "operator": "gte"} + {"scorer_id": "scorer-123", "threshold": 0.7, "operator": "gte"} ) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: @@ -774,56 +696,48 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None: status="success", ) - # When: evaluating selected scalar data result = await evaluator.evaluate("hello") - # Then: the control does not match assert result.matched is False assert result.confidence == 0.2 mock_invoke.assert_awaited_once_with( - scorer_label="toxicity", + scorer_id="scorer-123", input="hello", output=None, config=None, timeout=10.0, ) - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio @pytest.mark.parametrize("data", ["", " "]) async def test_evaluator_does_not_call_api_for_empty_data(self, data: str) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: an evaluator and empty selected data - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: - # When: evaluating empty data result = await evaluator.evaluate(data) - # Then: no remote scorer call is made assert result.matched is False assert result.confidence == 1.0 assert result.message == "No data to score with Luna" mock_invoke.assert_not_called() - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_fail_open_sets_error(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: fixed fail-open behavior for scorer errors - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: mock_invoke.side_effect = RuntimeError("service unavailable") - # When: the scorer call fails result = await evaluator.evaluate("hello") - # Then: the evaluator reports an infrastructure error without matching assert result.matched is False assert result.error == "service unavailable" assert result.metadata is not None @@ -831,16 +745,18 @@ async def test_evaluator_fail_open_sets_error(self) -> None: assert result.metadata["error_type"] == "RuntimeError" assert "fallback_action" not in result.metadata - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_error_metadata_includes_http_status_context(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict( + {"scorer_id": "scorer-123", "scorer_label": "toxicity", "threshold": 0.5} + ) request = httpx.Request( "POST", - "https://api.example.test/internal/scorers/invoke?token=secret", + "http://runners-api:8090/api/v1/scorers/invoke?token=secret", ) response = httpx.Response( 503, @@ -863,7 +779,7 @@ async def test_evaluator_error_metadata_includes_http_status_context(self) -> No assert result.metadata["error_type"] == "HTTPStatusError" assert result.metadata["http_status_code"] == 503 assert result.metadata["http_method"] == "POST" - assert result.metadata["http_endpoint_path"] == "/internal/scorers/invoke" + assert result.metadata["http_endpoint_path"] == "/api/v1/scorers/invoke" assert result.metadata["http_response_content_type"] == "application/json" assert result.metadata["http_response_body"] == '{"detail":"busy"}' assert result.metadata["http_response_body_truncated"] is False diff --git a/examples/README.md b/examples/README.md index ffe9b46a..daa27094 100644 --- a/examples/README.md +++ b/examples/README.md @@ -13,7 +13,7 @@ This directory contains runnable examples for Agent Control. Each example has it | Google ADK Decorator | Tool-only `@control()` pattern for explicit ADK tool protection. | https://docs.agentcontrol.dev/examples/google-adk-decorator | | Customer Support Agent | Enterprise scenario with PII protection, prompt-injection defense, and multiple tools. | https://docs.agentcontrol.dev/examples/customer-support | | DeepEval | Build a custom evaluator using DeepEval GEval metrics. | https://docs.agentcontrol.dev/examples/deepeval | -| Galileo Luna Direct | Direct `/scorers/invoke` Luna evaluation with a composite Agent Control condition. | `examples/galileo_luna/` | +| Galileo Luna Direct | Direct `/api/v1/scorers/invoke` Luna evaluation (`scorer_id` required; `scorer_label`/`scorer_version_id` optional) with a composite Agent Control condition. | `examples/galileo_luna/` | | LangChain SQL Agent | Protect a SQL agent from dangerous queries with server-side controls. | https://docs.agentcontrol.dev/examples/langchain-sql | | Steer Action Demo | Banking transfer agent showcasing observe, deny, and steer actions. | https://docs.agentcontrol.dev/examples/steer-action-demo | | Target Context | Bind controls to opaque external targets (e.g. `env=prod`) and let the SDK pin one target per session. | https://docs.agentcontrol.dev/examples/target-context | diff --git a/examples/galileo_luna/README.md b/examples/galileo_luna/README.md index b81b034f..651df181 100644 --- a/examples/galileo_luna/README.md +++ b/examples/galileo_luna/README.md @@ -1,6 +1,6 @@ # Galileo Luna Direct Evaluator Example -This example shows an Agent Control agent using the direct Galileo Luna evaluator (`galileo.luna`). The evaluator calls Galileo's `/scorers/invoke` API and applies thresholds locally from the control definition. +This example shows an Agent Control agent using the direct Galileo Luna evaluator (`galileo.luna`). The evaluator calls runners-api at `/api/v1/scorers/invoke` and applies thresholds locally from the control definition. ## What It Shows @@ -17,62 +17,38 @@ Start the Agent Control server from the repo root: make server-run ``` -Configure exactly one Galileo credential. - -For most OSS users, only an API key is required. This uses public API-key auth -and calls the public scorer API: - -```bash -export GALILEO_API_KEY="your-api-key" -export GALILEO_CONSOLE_URL="https://console.demo-v2.galileocloud.io" -``` - -`GALILEO_CONSOLE_URL` is optional when using the production console URL. -`GALILEO_LUNA_API_URL` is not required for this path. The client uses -`GALILEO_API_URL` when set, otherwise it derives the API URL from -`GALILEO_CONSOLE_URL`. - -For deployments that use service-to-service internal auth, the deployment -environment should inject the API internal secret instead of an API key: +Configure runners-api credentials: ```bash -# Set by deployment tooling, not by normal OSS users. export GALILEO_API_SECRET_KEY="your-api-secret" +export GALILEO_RUNNERS_API_URL="http://runners-api:8090" ``` -OSS users do not need to set `GALILEO_API_SECRET_KEY` manually for the public -API-key path. Deployment tooling may also set a custom scorer API endpoint and -CA bundle. Use these only when the scorer API is not reachable through the -default public API URL derivation, or when the endpoint uses a private CA: +`GALILEO_API_SECRET` can be used instead of `GALILEO_API_SECRET_KEY` if that is how your deployment exposes the internal runners-api JWT signing secret. + +Required scorer setting: ```bash -export GALILEO_LUNA_API_URL="https://api.default.svc.cluster.local:8088" -export GALILEO_LUNA_CA_FILE="/etc/ssl/internal/ca.crt" +export GALILEO_LUNA_SCORER_ID="your-scorer-uuid" ``` -`GALILEO_LUNA_API_URL` overrides the scorer API URL in either auth mode. -`GALILEO_LUNA_CA_FILE` is only needed for endpoints that are not trusted by the -system CA store. - Optional scorer settings: ```bash -export GALILEO_LUNA_SCORER_LABEL="toxicity" -# Or select by scorer id/version instead of label: -# export GALILEO_LUNA_SCORER_ID="scorer-id" -# export GALILEO_LUNA_SCORER_VERSION_ID="scorer-version-id" +export GALILEO_LUNA_SCORER_LABEL="toxicity" # display/metadata label only +export GALILEO_LUNA_SCORER_VERSION_ID="version-uuid" # pin a specific scorer version export GALILEO_LUNA_THRESHOLD="0.5" export GALILEO_LUNA_PAYLOAD_FIELD="output" ``` -`GALILEO_LUNA_PAYLOAD_FIELD` is explicit for scalar selected data. This example -selects the agent's drafted reply with `selector.path="output"`, so it sends that -scalar as the scorer `output` field. If a selector returns structured data with -`input` and/or `output` keys, those keys are sent directly and override -`GALILEO_LUNA_PAYLOAD_FIELD`. +`GALILEO_LUNA_PAYLOAD_FIELD` is explicit for scalar selected data. This example selects the agent's drafted reply with `selector.path="output"`, so it sends that scalar as the scorer `output` field. If a selector returns structured data with `input` and/or `output` keys, those keys are sent directly and override `GALILEO_LUNA_PAYLOAD_FIELD`. + +If the runners-api endpoint uses an internal certificate authority, configure one of: -Setting both `GALILEO_API_KEY` and `GALILEO_API_SECRET_KEY` is an error; unset -one so the auth mode can be inferred. +```bash +export GALILEO_RUNNERS_API_CA_FILE="/etc/ssl/internal/runners-api-ca.crt" +export AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE="/etc/agent-control/auth-upstream-ca/ca.crt" +``` Run: diff --git a/examples/galileo_luna/demo_agent.py b/examples/galileo_luna/demo_agent.py index 0b6a0f8a..af95ce70 100644 --- a/examples/galileo_luna/demo_agent.py +++ b/examples/galileo_luna/demo_agent.py @@ -4,7 +4,9 @@ Prerequisites: 1. Start server: make server-run 2. Create controls: uv run python setup_controls.py - 3. Set Galileo credentials where this script runs + 3. Set Galileo credentials where this script runs: + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET + GALILEO_RUNNERS_API_URL Usage: uv run python demo_agent.py @@ -90,29 +92,25 @@ def init_agent() -> None: async def run_demo() -> None: """Run scripted scenarios.""" - api_key = os.getenv("GALILEO_API_KEY") - api_secret = os.getenv("GALILEO_API_SECRET_KEY") - if not api_key and not api_secret: + api_secret = os.getenv("GALILEO_API_SECRET_KEY") or os.getenv("GALILEO_API_SECRET") + runners_url = os.getenv("GALILEO_RUNNERS_API_URL") + + if not api_secret: print( - "Galileo credentials are required for the galileo.luna evaluator. " - "Set GALILEO_API_KEY for public mode. Deployments using internal " - "mode should inject GALILEO_API_SECRET_KEY." + "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required for the " + "galileo.luna evaluator." ) return - if api_key and api_secret: - print( - "Both GALILEO_API_KEY and GALILEO_API_SECRET_KEY are set. " - "Unset one so the auth mode can be inferred." - ) + if not runners_url: + print("GALILEO_RUNNERS_API_URL is required for the galileo.luna evaluator.") return - auth_mode = "public" if api_key else "internal" print("=" * 72) print("Direct Galileo Luna Evaluator Demo") print("=" * 72) - print(f"Server: {SERVER_URL}") - print(f"Agent: {AGENT_NAME}") - print(f"Auth: {auth_mode}") + print(f"Server: {SERVER_URL}") + print(f"Agent: {AGENT_NAME}") + print(f"Runners API: {runners_url}") print() init_agent() diff --git a/examples/galileo_luna/setup_controls.py b/examples/galileo_luna/setup_controls.py index fe1434c8..b44c2dde 100644 --- a/examples/galileo_luna/setup_controls.py +++ b/examples/galileo_luna/setup_controls.py @@ -4,8 +4,9 @@ Prerequisites: - Agent Control server running at AGENT_CONTROL_URL, default http://localhost:8000 - Galileo credentials set where demo_agent.py will run: - GALILEO_API_KEY for public auth, or - deployment-injected GALILEO_API_SECRET_KEY for internal auth + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET + GALILEO_RUNNERS_API_URL + GALILEO_LUNA_SCORER_ID (required) Usage: uv run python setup_controls.py @@ -24,12 +25,14 @@ AGENT_DESCRIPTION = "Demo agent protected by direct Galileo Luna scorer controls" SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000") -LUNA_SCORER_LABEL = os.getenv("GALILEO_LUNA_SCORER_LABEL", "toxicity") LUNA_SCORER_ID = os.getenv("GALILEO_LUNA_SCORER_ID") +LUNA_SCORER_LABEL = os.getenv("GALILEO_LUNA_SCORER_LABEL") LUNA_SCORER_VERSION_ID = os.getenv("GALILEO_LUNA_SCORER_VERSION_ID") LUNA_THRESHOLD = float(os.getenv("GALILEO_LUNA_THRESHOLD", "0.5")) LUNA_PAYLOAD_FIELD = os.getenv("GALILEO_LUNA_PAYLOAD_FIELD", "output") +if not LUNA_SCORER_ID: + raise ValueError("GALILEO_LUNA_SCORER_ID is required.") if LUNA_PAYLOAD_FIELD not in {"input", "output"}: raise ValueError("GALILEO_LUNA_PAYLOAD_FIELD must be either 'input' or 'output'.") @@ -47,14 +50,13 @@ def luna_config() -> dict[str, Any]: """Build the direct Luna evaluator config used by the composite control.""" config: dict[str, Any] = { + "scorer_id": LUNA_SCORER_ID, "threshold": LUNA_THRESHOLD, "operator": "gte", "payload_field": LUNA_PAYLOAD_FIELD, } if LUNA_SCORER_LABEL: config["scorer_label"] = LUNA_SCORER_LABEL - if LUNA_SCORER_ID: - config["scorer_id"] = LUNA_SCORER_ID if LUNA_SCORER_VERSION_ID: config["scorer_version_id"] = LUNA_SCORER_VERSION_ID return config @@ -168,13 +170,12 @@ async def setup_demo() -> None: print(f"Agent: {AGENT_NAME}") print( "Luna: " - f"scorer_label={LUNA_SCORER_LABEL!r}, " f"scorer_id={LUNA_SCORER_ID!r}, " + f"scorer_label={LUNA_SCORER_LABEL!r}, " f"scorer_version_id={LUNA_SCORER_VERSION_ID!r}, " f"threshold={LUNA_THRESHOLD}, " f"payload_field={LUNA_PAYLOAD_FIELD!r}" ) - print("Auth: inferred from the single configured Galileo credential") async with AgentControlClient(base_url=SERVER_URL, timeout=30.0) as client: await client.health_check() diff --git a/models/pyproject.toml b/models/pyproject.toml index c6d2d85a..6e340381 100644 --- a/models/pyproject.toml +++ b/models/pyproject.toml @@ -6,6 +6,7 @@ requires-python = ">=3.12" dependencies = [ "pydantic>=2.12.4", "jsonschema>=4.0.0", + "google-re2>=1.1", ] authors = [ {name = "Agent Control Team"} diff --git a/server/tests/test_init_agent.py b/server/tests/test_init_agent.py index fe88ce30..3f66b89d 100644 --- a/server/tests/test_init_agent.py +++ b/server/tests/test_init_agent.py @@ -67,6 +67,21 @@ def make_agent_payload( } +def _collect_paths(routes: list, prefix: str = "") -> set[str]: + """Recursively collect route paths, handling FastAPI's _IncludedRouter wrapper.""" + paths: set[str] = set() + for route in routes: + path = getattr(route, "path", None) + if path: + paths.add(prefix + path) + nested = getattr(route, "original_router", None) + if nested: + ctx = getattr(route, "include_context", None) + pfx = getattr(ctx, "prefix", "") if ctx else "" + paths |= _collect_paths(nested.routes, prefix + pfx) + return paths + + def test_init_agent_route_exists(app: FastAPI) -> None: # Given: an application router paths = _collect_route_paths(list(app.router.routes))