From 04a55c375d5380c7f5ec1b67338da12192f0ad13 Mon Sep 17 00:00:00 2001 From: abhinav-galileo Date: Tue, 16 Jun 2026 22:33:43 +0530 Subject: [PATCH 1/6] fix(server): declare plain psycopg dependency --- server/pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/pyproject.toml b/server/pyproject.toml index 4779e4fb..0661d9ac 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "pydantic>=2.5.0", "pydantic-settings>=2.1.0", "SQLAlchemy>=2.0.0", - "psycopg[binary]>=3.1", + "psycopg>=3.1", "asyncpg>=0.29.0", "greenlet>=3.0", "alembic>=1.13.0", @@ -34,6 +34,8 @@ license = {text = "Apache-2.0"} [project.optional-dependencies] galileo = ["agent-control-evaluator-galileo>=7.5.0"] +binary = ["psycopg[binary]>=3.1"] +c = ["psycopg[c]>=3.1"] [dependency-groups] dev = [ From 51898274bce5a410afcc6dce80e57fc2fd0133a4 Mon Sep 17 00:00:00 2001 From: abhinav-galileo Date: Tue, 16 Jun 2026 16:06:50 +0530 Subject: [PATCH 2/6] test(server): support included router route assertions --- server/tests/test_init_agent.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/server/tests/test_init_agent.py b/server/tests/test_init_agent.py index 2dfe9eaa..fc973118 100644 --- a/server/tests/test_init_agent.py +++ b/server/tests/test_init_agent.py @@ -16,6 +16,29 @@ engine = create_engine(db_config.get_url(), echo=False) +def _collect_route_paths(routes: list[Any], prefix: str = "") -> set[str]: + paths: set[str] = set() + for route in routes: + path = getattr(route, "path", None) + if isinstance(path, str): + paths.add(f"{prefix}{path}") + continue + + include_context = getattr(route, "include_context", None) + included_router = getattr(include_context, "included_router", None) + if included_router is None: + continue + + include_prefix = getattr(include_context, "prefix", "") + paths.update( + _collect_route_paths( + list(getattr(included_router, "routes", [])), + f"{prefix}{include_prefix}", + ) + ) + return paths + + def make_agent_payload( agent_name: str | None = None, name: str = "testagent0001", @@ -48,7 +71,7 @@ def make_agent_payload( def test_init_agent_route_exists(app: FastAPI) -> None: # Given: an application router - paths = {getattr(route, "path", None) for route in app.router.routes} + paths = _collect_route_paths(list(app.router.routes)) # When: inspecting registered paths # (computation done above to gather all paths) # Then: initAgent and agent retrieval endpoints are present From 8c7c83d3741ffd0e7cee98df8487bcfab23fcc65 Mon Sep 17 00:00:00 2001 From: Namrata Ghadi Date: Tue, 16 Jun 2026 12:48:23 -0700 Subject: [PATCH 3/6] new api from runners-api --- README.md | 47 ++- evaluators/contrib/galileo/README.md | 8 +- .../luna/client.py | 221 ++++-------- .../luna/config.py | 25 +- .../luna/evaluator.py | 46 ++- .../galileo/tests/test_luna_coverage_gaps.py | 329 ++++++++---------- .../galileo/tests/test_luna_evaluator.py | 267 ++++++-------- examples/README.md | 2 +- examples/galileo_luna/README.md | 25 +- examples/galileo_luna/demo_agent.py | 37 +- examples/galileo_luna/setup_controls.py | 16 +- models/pyproject.toml | 1 + server/tests/test_init_agent.py | 21 +- 13 files changed, 442 insertions(+), 603 deletions(-) diff --git a/README.md b/README.md index 8acf30cf..3ca80c65 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Enforce runtime guardrails through a centralized control layer—configure once ## Quick Start -Prerequisites: Docker and Python 3.12+. +Prerequisites: Docker (or Podman, see [Podman setup](#podman-setup)) and Python 3.12+. Quick start flow: @@ -292,6 +292,51 @@ Explore working examples for popular frameworks. - [AWS Strands](examples/strands_agents/) - protect Strands workflows and tool calls - [Google ADK Decorator](examples/google_adk_decorator/) - add controls with `@control()` +## Podman Setup + +If Docker Desktop is not available, you can use [Podman](https://podman-desktop.io) as a drop-in replacement. No changes to repo files are needed — the setup below makes `docker` and `docker compose` transparently resolve to Podman. + +**One-time setup:** + +1. Install [Podman Desktop](https://podman-desktop.io) and create a machine from its UI (start it before continuing). + +2. Install `podman-compose`: + +```bash +brew install podman-compose +``` + +3. Create a `docker` shim that routes `docker compose` to `podman-compose` and everything else to `podman`: + +```bash +mkdir -p ~/.local/bin +cat > ~/.local/bin/docker << 'EOF' +#!/bin/zsh +if [[ "$1" == "compose" ]]; then + shift + exec podman-compose "$@" +fi +exec podman "$@" +EOF +chmod +x ~/.local/bin/docker +``` + +4. Add `~/.local/bin` early in your PATH (if not already): + +```bash +echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc +source ~/.zshrc +``` + +**Verify:** + +```bash +docker ps +docker compose version +``` + +After this, all existing `docker`/`docker compose` commands and `make` targets work as-is. + ## How It Works ![Agent Control Architecture](docs/images/Architecture.png) diff --git a/evaluators/contrib/galileo/README.md b/evaluators/contrib/galileo/README.md index f8461f2a..038da67a 100644 --- a/evaluators/contrib/galileo/README.md +++ b/evaluators/contrib/galileo/README.md @@ -6,9 +6,11 @@ Integration package for Galileo Luna evaluator. The `galileo.luna2` evaluator ID has been removed. Existing controls that use `galileo.luna2` should migrate to `galileo.luna` and update their evaluator -configuration to the direct Luna scorer fields (`scorer_label`, `scorer_id`, or -`scorer_version_id`, plus `threshold` and `operator`). If you still need the -legacy Luna2 evaluator, pin `agent-control-evaluator-galileo <8`. +configuration to use the direct Luna scorer fields. `scorer_id` is required; +`scorer_label` and `scorer_version_id` are optional. The evaluator calls +runners-api at `/api/v1/scorers/invoke`. Also set `threshold` and `operator` +as needed. If you still need the legacy Luna2 evaluator, pin +`agent-control-evaluator-galileo <8`. ## Install diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py index 11e4881a..6a2942fe 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py @@ -1,4 +1,4 @@ -"""Direct HTTP client for Galileo Luna scorer invocation.""" +"""Direct HTTP client for Galileo Luna scorer invocation via runners-api.""" from __future__ import annotations @@ -9,8 +9,6 @@ from hmac import new as hmac_new from json import dumps from time import time -from typing import Literal -from urllib.parse import urlsplit, urlunsplit import httpx from agent_control_models import JSONObject, JSONValue @@ -20,9 +18,11 @@ DEFAULT_TIMEOUT_SECS = 10.0 DEFAULT_INTERNAL_TOKEN_TTL_SECS = 3600 -PUBLIC_SCORER_INVOKE_PATH = "/scorers/invoke" -INTERNAL_SCORER_INVOKE_PATH = "/internal/scorers/invoke" -AuthMode = Literal["public", "internal"] +RUNNERS_SCORER_INVOKE_PATH = "/api/v1/scorers/invoke" +RUNNERS_API_URL_ENV = "GALILEO_RUNNERS_API_URL" + +# Headers that must never be forwarded to runners-api (checked case-insensitively). +_BLOCKED_REQUEST_HEADERS = frozenset({"galileo-api-key"}) def _b64url(data: bytes) -> str: @@ -33,7 +33,7 @@ def _internal_auth_token( api_secret: str, ttl_seconds: int = DEFAULT_INTERNAL_TOKEN_TTL_SECS, ) -> str: - """Create the internal JWT expected by Galileo API internal routes.""" + """Create the internal JWT expected by runners-api scorer invoke routes.""" now = int(time()) header = {"alg": "HS256", "typ": "JWT"} payload = { @@ -52,18 +52,6 @@ def _internal_auth_token( return f"{signing_input}.{_b64url(signature)}" -def _env_auth_mode() -> AuthMode | None: - value = os.getenv("GALILEO_LUNA_AUTH_MODE") - if value is None or value.strip() == "": - return None - normalized = value.strip().lower() - if normalized == "public": - return "public" - if normalized == "internal": - return "internal" - raise ValueError("GALILEO_LUNA_AUTH_MODE must be either 'public' or 'internal'.") - - def _as_float_or_none(value: JSONValue) -> float | None: if isinstance(value, bool) or value is None: return None @@ -88,7 +76,7 @@ def _has_value(value: JSONValue) -> bool: class ScorerInvokeInputs(BaseModel): - """Input values sent to Galileo's scorer invoke API.""" + """Input values sent to the runners-api scorer invoke endpoint.""" query: JSONValue = "" response: JSONValue = "" @@ -97,39 +85,35 @@ class ScorerInvokeInputs(BaseModel): class ScorerInvokeRequest(BaseModel): - """Request payload for Galileo Luna scorer invocation. + """Request payload for runners-api scorer invocation. Attributes: + scorer_id: Required scorer identifier. + scorer_version_id: Optional pinned scorer version identifier. + scorer_label: Optional display/metadata label. inputs: Selected scorer input values. - scorer_label: Preset, registered, or fine-tuned scorer label. - scorer_id: Optional Galileo scorer identifier. - scorer_version_id: Optional Galileo scorer version identifier. config: Optional scorer-specific configuration. """ - inputs: ScorerInvokeInputs - scorer_label: str | None = Field(default=None, min_length=1) - scorer_id: str | None = Field(default=None, min_length=1) + scorer_id: str = Field(min_length=1) scorer_version_id: str | None = Field(default=None, min_length=1) - config: JSONObject | None = None + scorer_label: str | None = Field(default=None, min_length=1) + inputs: ScorerInvokeInputs + config: JSONObject = Field(default_factory=dict) @model_validator(mode="after") def ensure_required_values(self) -> ScorerInvokeRequest: - if not (self.scorer_label or self.scorer_id or self.scorer_version_id): - raise ValueError( - "One of scorer_label, scorer_id, or scorer_version_id must be set." - ) if not (_has_value(self.inputs.query) or _has_value(self.inputs.response)): raise ValueError("Either inputs.query or inputs.response must be set.") return self def to_dict(self) -> JSONObject: - """Convert to the Galileo scorer invoke API request shape.""" + """Convert to the runners-api scorer invoke request shape.""" return self.model_dump(mode="json", exclude_none=True) class ScorerInvokeResponse(BaseModel): - """Response from Galileo Luna scorer invocation. + """Response from runners-api scorer invocation. Attributes: scorer_label: Echoed scorer label, when returned. @@ -152,7 +136,7 @@ def raw_response(self) -> JSONObject: @classmethod def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse: - """Create a response model from the API JSON object.""" + """Create a response model from the runners-api JSON object.""" response = cls.model_validate( data | {"execution_time": _as_float_or_none(data.get("execution_time"))} ) @@ -161,166 +145,82 @@ def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse: class GalileoLunaClient: - """Thin HTTP client for Galileo Luna direct scorer invocation. + """Thin HTTP client for Galileo Luna scorer invocation via runners-api. Environment Variables: - GALILEO_API_SECRET_KEY or GALILEO_API_SECRET: Galileo API internal JWT signing secret. - GALILEO_API_KEY: Galileo API key fallback for public scorer invocation. - GALILEO_LUNA_AUTH_MODE: Auth mode, either "public" or "internal". - GALILEO_CONSOLE_URL: Galileo Console URL (optional, defaults to production). + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET: JWT signing secret for runners-api auth. + GALILEO_RUNNERS_API_URL: runners-api base URL (required). """ def __init__( self, - api_key: str | None = None, api_secret: str | None = None, - console_url: str | None = None, - api_url: str | None = None, - auth_mode: AuthMode | None = None, + runners_api_url: str | None = None, ) -> None: """Initialize the Galileo Luna client. Args: - api_key: Galileo API key. If not provided, reads from GALILEO_API_KEY. - api_secret: Galileo API secret for internal JWT auth. If not provided, - reads from GALILEO_API_SECRET_KEY or GALILEO_API_SECRET. - console_url: Galileo Console URL. If not provided, reads from - GALILEO_CONSOLE_URL or uses the production console URL. - api_url: Galileo API URL. If not provided, reads from GALILEO_API_URL - before deriving from the console URL. - auth_mode: Auth mode to use. If not provided, reads from - GALILEO_LUNA_AUTH_MODE, or infers from the single available credential. + api_secret: Internal JWT signing secret. If not provided, reads from + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET. + runners_api_url: runners-api base URL. If not provided, reads from + GALILEO_RUNNERS_API_URL. Raises: - ValueError: If credentials are missing, ambiguous, or incompatible with - the selected auth mode. + ValueError: If the API secret or runners-api URL is not configured. """ resolved_api_secret = ( api_secret or os.getenv("GALILEO_API_SECRET_KEY") or os.getenv("GALILEO_API_SECRET") ) - resolved_api_key = api_key or os.getenv("GALILEO_API_KEY") - resolved_auth_mode = self._resolve_auth_mode( - auth_mode or _env_auth_mode(), - api_key=resolved_api_key, - api_secret=resolved_api_secret, - ) - - self.api_key = resolved_api_key - self.api_secret = resolved_api_secret - self.auth_mode = resolved_auth_mode - self.console_url = ( - console_url or os.getenv("GALILEO_CONSOLE_URL") or "https://console.galileo.ai" - ) - self.api_base = (api_url or os.getenv("GALILEO_API_URL") or "").rstrip( - "/" - ) or self._derive_api_url(self.console_url) - self._client: httpx.AsyncClient | None = None - logger.info("[GalileoLunaClient] Auth mode selected: %s", self.auth_mode) - - @staticmethod - def _resolve_auth_mode( - auth_mode: AuthMode | None, - *, - api_key: str | None, - api_secret: str | None, - ) -> AuthMode: - if auth_mode == "public": - if not api_key: - raise ValueError( - "GALILEO_API_KEY is required when GALILEO_LUNA_AUTH_MODE=public." - ) - return "public" - - if auth_mode == "internal": - if not api_secret: - raise ValueError( - "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required when " - "GALILEO_LUNA_AUTH_MODE=internal." - ) - return "internal" - - if api_key and api_secret: + if not resolved_api_secret: raise ValueError( - "Both Galileo API key and API secret are configured. Set " - "GALILEO_LUNA_AUTH_MODE to 'public' or 'internal' to choose the " - "runtime auth mode explicitly." + "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required for Luna " + "runners-api invocation. Set one as an environment variable or pass it " + "to the constructor." ) - if api_secret: - return "internal" - if api_key: - return "public" - raise ValueError( - "GALILEO_API_SECRET_KEY or GALILEO_API_KEY is required. " - "Set one as an environment variable or pass it to the constructor." - ) - def _derive_api_url(self, console_url: str) -> str: - """Derive the API URL from a Galileo Console URL. + resolved_runners_url = runners_api_url or os.getenv(RUNNERS_API_URL_ENV) + if not resolved_runners_url: + raise ValueError( + "GALILEO_RUNNERS_API_URL is required for Luna runners-api invocation. " + "Set it as an environment variable or pass it to the constructor." + ) - Galileo Console hostnames use ``console.`` or ``console-`` prefixes for - canonical environments. For other HTTP(S) hosts, preserve the existing - fallback behavior of prefixing the hostname with ``api.``. - """ - url = console_url.rstrip("/") - parts = urlsplit(url) - host = parts.hostname or "" - - if host.startswith("console."): - new_host = "api." + host[len("console."):] - elif host.startswith("console-"): - new_host = "api-" + host[len("console-"):] - elif parts.scheme in {"http", "https"} and host: - new_host = f"api.{host}" - else: - return url - - return urlunsplit( - parts._replace(netloc=parts.netloc.replace(host, new_host, 1)) - ) + self.api_secret = resolved_api_secret + self.runners_api_base = resolved_runners_url.rstrip("/") + self._client: httpx.AsyncClient | None = None async def _get_client(self) -> httpx.AsyncClient: """Get or create the HTTP client.""" if self._client is None or self._client.is_closed: - headers = {"Content-Type": "application/json"} - if self.auth_mode == "public" and self.api_key is not None: - headers["Galileo-API-Key"] = self.api_key self._client = httpx.AsyncClient( - headers=headers, + headers={"Content-Type": "application/json"}, timeout=httpx.Timeout(DEFAULT_TIMEOUT_SECS), ) return self._client - def _endpoint_and_headers( - self, - headers: dict[str, str] | None, - ) -> tuple[str, dict[str, str]]: - request_headers = dict(headers or {}) - if self.auth_mode == "public": - return f"{self.api_base}{PUBLIC_SCORER_INVOKE_PATH}", request_headers - - if self.api_secret is None: - raise RuntimeError("Internal Luna auth mode is missing an API secret.") - request_headers["Authorization"] = f"Bearer {_internal_auth_token(self.api_secret)}" - return f"{self.api_base}{INTERNAL_SCORER_INVOKE_PATH}", request_headers + def _endpoint_and_auth_header(self) -> tuple[str, str]: + token = _internal_auth_token(self.api_secret) + endpoint = f"{self.runners_api_base}{RUNNERS_SCORER_INVOKE_PATH}" + return endpoint, f"Bearer {token}" async def invoke( self, *, - scorer_label: str | None = None, - scorer_id: str | None = None, + scorer_id: str, scorer_version_id: str | None = None, + scorer_label: str | None = None, input: JSONValue = None, output: JSONValue = None, config: JSONObject | None = None, timeout: float = DEFAULT_TIMEOUT_SECS, headers: dict[str, str] | None = None, ) -> ScorerInvokeResponse: - """Invoke a Galileo Luna scorer. + """Invoke a Galileo Luna scorer via runners-api. Args: - scorer_label: Preset, registered, or fine-tuned scorer label. - scorer_id: Optional Galileo scorer identifier. - scorer_version_id: Optional Galileo scorer version identifier. + scorer_id: Required scorer identifier. + scorer_version_id: Optional pinned scorer version identifier. + scorer_label: Optional display/metadata label. input: Optional user/system prompt text. output: Optional model response text. config: Optional scorer-specific configuration. @@ -333,24 +233,29 @@ async def invoke( Raises: ValueError: If neither input nor output is provided. RuntimeError: If the API response is not a JSON object. - httpx.HTTPStatusError: If the API returns an error status code. + httpx.HTTPStatusError: If runners-api returns an error status code. httpx.RequestError: If the request fails before a response is received. """ - if not (scorer_label or scorer_id or scorer_version_id): - raise ValueError("At least one scorer identifier must be provided.") if not (_has_value(input) or _has_value(output)): raise ValueError("At least one of input or output must be provided.") request_body = ScorerInvokeRequest( - scorer_label=scorer_label, scorer_id=scorer_id, scorer_version_id=scorer_version_id, + scorer_label=scorer_label, inputs=ScorerInvokeInputs( query="" if input is None else input, response="" if output is None else output ), - config=config, + config=config if config is not None else {}, ).to_dict() - endpoint, request_headers = self._endpoint_and_headers(headers) + + endpoint, auth_header = self._endpoint_and_auth_header() + request_headers = { + k: v + for k, v in (headers or {}).items() + if k.lower() not in _BLOCKED_REQUEST_HEADERS + } + request_headers["Authorization"] = auth_header logger.debug("[GalileoLunaClient] POST %s", endpoint) logger.debug("[GalileoLunaClient] Request body: %s", request_body) diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py index 788fa24c..bb8a9804 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py @@ -32,9 +32,9 @@ class LunaEvaluatorConfig(EvaluatorConfig): """Configuration for direct Luna scorer evaluation. Attributes: - scorer_label: Preset, registered, or fine-tuned scorer label. - scorer_id: Optional Galileo scorer identifier. - scorer_version_id: Optional Galileo scorer version identifier. + scorer_id: Required scorer identifier for runners-api invocation. + scorer_version_id: Optional pinned scorer version identifier. + scorer_label: Optional display/metadata label. threshold: Local threshold used by the evaluator for comparison. operator: Local comparison operator. Numeric operators use threshold as a number. scorer_config: Optional scorer-specific config sent as ``config``. @@ -42,20 +42,19 @@ class LunaEvaluatorConfig(EvaluatorConfig): timeout_ms: Request timeout in milliseconds. """ - scorer_label: str | None = Field( - default=None, + scorer_id: str = Field( min_length=1, - description="Luna scorer label to invoke.", + description="Required scorer identifier for runners-api invocation.", ) - scorer_id: str | None = Field( + scorer_version_id: str | None = Field( default=None, min_length=1, - description="Optional Galileo scorer identifier to invoke.", + description="Optional pinned scorer version identifier.", ) - scorer_version_id: str | None = Field( + scorer_label: str | None = Field( default=None, min_length=1, - description="Optional Galileo scorer version identifier to invoke.", + description="Optional display/metadata label.", ) threshold: JSONValue = Field( default=0.5, @@ -69,7 +68,7 @@ class LunaEvaluatorConfig(EvaluatorConfig): default=None, alias="config", serialization_alias="config", - description="Optional scorer-specific configuration sent to Galileo.", + description="Optional scorer-specific configuration sent to runners-api.", ) payload_field: LunaPayloadField = Field( default="input", @@ -88,10 +87,6 @@ class LunaEvaluatorConfig(EvaluatorConfig): @model_validator(mode="after") def validate_threshold(self) -> LunaEvaluatorConfig: """Validate threshold compatibility with the configured operator.""" - if not (self.scorer_label or self.scorer_id or self.scorer_version_id): - raise ValueError( - "one of scorer_label, scorer_id, or scorer_version_id is required" - ) if self.operator in _NUMERIC_OPERATORS and coerce_number(self.threshold) is None: raise ValueError(f"operator '{self.operator}' requires a numeric threshold") if self.operator != "any" and self.threshold is None: diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py index eff92f2a..f204b21d 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py @@ -99,17 +99,13 @@ def __init__(self, config: LunaEvaluatorConfig) -> None: config: Validated LunaEvaluatorConfig instance. Raises: - ValueError: If neither GALILEO_API_SECRET_KEY nor GALILEO_API_KEY is set. + ValueError: If neither GALILEO_API_SECRET_KEY nor GALILEO_API_SECRET is set. """ - has_auth = ( - os.getenv("GALILEO_API_SECRET_KEY") - or os.getenv("GALILEO_API_SECRET") - or os.getenv("GALILEO_API_KEY") - ) - if not has_auth: + has_secret = os.getenv("GALILEO_API_SECRET_KEY") or os.getenv("GALILEO_API_SECRET") + if not has_secret: raise ValueError( - "GALILEO_API_SECRET_KEY or GALILEO_API_KEY environment variable must be set. " - "Set an API secret for internal auth or a Galileo API key before using " + "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required for Luna " + "runners-api invocation. Set one as an environment variable before using " "galileo.luna." ) @@ -216,28 +212,30 @@ async def evaluate(self, data: Any) -> EvaluatorResult: return self._handle_error(exc) def _base_metadata(self) -> dict[str, Any]: - metadata = { - "scorer_label": self.config.scorer_label, - "scorer_id": self.config.scorer_id, - "scorer_version_id": self.config.scorer_version_id, - } - return {key: value for key, value in metadata.items() if value is not None} + metadata: dict[str, Any] = {"scorer_id": self.config.scorer_id} + if self.config.scorer_version_id is not None: + metadata["scorer_version_id"] = self.config.scorer_version_id + if self.config.scorer_label is not None: + metadata["scorer_label"] = self.config.scorer_label + return metadata def _scorer_kwargs(self) -> dict[str, Any]: - kwargs = { - "scorer_label": self.config.scorer_label, - "scorer_id": self.config.scorer_id, - "scorer_version_id": self.config.scorer_version_id, - } - return {key: value for key, value in kwargs.items() if value is not None} + kwargs: dict[str, Any] = {"scorer_id": self.config.scorer_id} + if self.config.scorer_version_id is not None: + kwargs["scorer_version_id"] = self.config.scorer_version_id + if self.config.scorer_label is not None: + kwargs["scorer_label"] = self.config.scorer_label + return kwargs def _metadata( self, response: ScorerInvokeResponse, ) -> dict[str, Any]: metadata: dict[str, Any] = self._base_metadata() + echoed_label = response.scorer_label or self.config.scorer_label + if echoed_label is not None: + metadata["scorer_label"] = echoed_label metadata.update({ - "scorer_label": response.scorer_label or self.config.scorer_label, "score": response.score, "threshold": self.config.threshold, "operator": self.config.operator, @@ -257,10 +255,8 @@ def _handle_error( confidence=0.0, message=f"Luna evaluation error: {error_detail}", metadata={ + **self._base_metadata(), "error_type": type(error).__name__, - "scorer_label": self.config.scorer_label, - "scorer_id": self.config.scorer_id, - "scorer_version_id": self.config.scorer_version_id, }, error=error_detail, ) diff --git a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py b/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py index 9d5f6766..cfbccdc4 100644 --- a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py +++ b/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py @@ -8,11 +8,23 @@ import json import os +from base64 import urlsafe_b64decode from unittest.mock import AsyncMock, MagicMock, patch import httpx import pytest +RUNNERS_ENV = { + "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", +} + + +def _decode_jwt_payload(token: str) -> dict[str, object]: + payload_segment = token.split(".")[1] + padded = payload_segment + ("=" * (-len(payload_segment) % 4)) + return json.loads(urlsafe_b64decode(padded.encode()).decode()) + # ============================================================================= # luna/evaluator.py: utility helpers @@ -52,8 +64,6 @@ class CannotJson: def __repr__(self): return "" - # json.dumps with default=str would actually serialize this, so use - # something that breaks both the JSON pass AND triggers TypeError. cannot = CannotJson() result = _coerce_payload_text({"obj": cannot}) @@ -108,7 +118,6 @@ def test_dict_threshold_matches_value(self): def test_other_types_return_false(self): from agent_control_evaluator_galileo.luna.evaluator import _contains - # Non-iterable score => no match. assert _contains(42, 42) is False @@ -133,7 +142,6 @@ def test_in_range_number_returned_as_is(self): def test_out_of_range_falls_back_to_one(self): from agent_control_evaluator_galileo.luna.evaluator import _confidence_from_score - # Above 1.0 → fall back to default confidence assert _confidence_from_score(7.2) == 1.0 def test_non_numeric_falls_back_to_one(self): @@ -150,11 +158,12 @@ def test_non_numeric_falls_back_to_one(self): @pytest.fixture def luna_evaluator(monkeypatch): """A ready-to-use LunaEvaluator instance with auth env wired up.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator return LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": 0.5, "operator": "gte"} + {"scorer_id": "scorer-123", "threshold": 0.5, "operator": "gte"} ) @@ -162,15 +171,12 @@ class TestScoreMatchesOperators: """Every operator branch in ``_score_matches`` should evaluate.""" def _make(self, operator, threshold, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator - if operator in {"eq", "ne", "contains"}: - threshold_value = threshold - else: - threshold_value = threshold return LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": threshold_value, "operator": operator} + {"scorer_id": "scorer-123", "threshold": threshold, "operator": operator} ) def test_any_truthy_score_matches(self, monkeypatch): @@ -218,11 +224,12 @@ def test_numeric_operator_rejects_non_numeric_score(self, monkeypatch): class TestPreparePayload: """``_prepare_payload`` routes scalar data using explicit config.""" - def test_scalar_routed_to_input_when_label_lacks_output(self, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + def test_scalar_routed_to_input_by_default(self, monkeypatch): + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) input_text, output_text = evaluator._prepare_payload("hello") @@ -230,15 +237,12 @@ def test_scalar_routed_to_input_when_label_lacks_output(self, monkeypatch): assert output_text is None def test_scalar_routed_to_output_when_payload_field_is_output(self, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator evaluator = LunaEvaluator.from_dict( - { - "scorer_label": "toxicity", - "threshold": 0.5, - "payload_field": "output", - } + {"scorer_id": "scorer-123", "threshold": 0.5, "payload_field": "output"} ) input_text, output_text = evaluator._prepare_payload("hello") @@ -246,32 +250,13 @@ def test_scalar_routed_to_output_when_payload_field_is_output(self, monkeypatch) assert input_text is None assert output_text == "hello" - def test_scalar_output_label_without_payload_field_still_defaults_to_input( - self, - monkeypatch, - ): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator - - evaluator = LunaEvaluator.from_dict( - {"scorer_label": "output_correctness", "threshold": 0.5} - ) - - input_text, output_text = evaluator._prepare_payload("hello") - - assert input_text == "hello" - assert output_text is None - def test_structured_payload_uses_input_output_keys_over_payload_field(self, monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator evaluator = LunaEvaluator.from_dict( - { - "scorer_label": "toxicity", - "threshold": 0.5, - "payload_field": "output", - } + {"scorer_id": "scorer-123", "threshold": 0.5, "payload_field": "output"} ) input_text, output_text = evaluator._prepare_payload( @@ -285,10 +270,11 @@ def test_structured_payload_uses_input_output_keys_over_payload_field(self, monk @pytest.mark.asyncio async def test_evaluator_aclose_closes_underlying_client(monkeypatch): """``aclose`` must release the eagerly-created client without clearing it.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) fake = MagicMock() fake.close = AsyncMock() @@ -303,12 +289,13 @@ async def test_evaluator_aclose_closes_underlying_client(monkeypatch): @pytest.mark.asyncio async def test_evaluator_handles_non_success_status(monkeypatch): """A non-success status from the scorer must surface as an error result.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse from agent_control_evaluator_galileo.luna.client import GalileoLunaClient evaluator = LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": 0.5, "operator": "gte"} + {"scorer_id": "scorer-123", "threshold": 0.5, "operator": "gte"} ) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: @@ -417,184 +404,98 @@ def test_scalar_other_types_have_value(self): from agent_control_evaluator_galileo.luna.client import _has_value assert _has_value(42) is True - assert _has_value(0) is True # 0 is a real value, not empty + assert _has_value(0) is True assert _has_value(True) is True class TestScorerInvokeRequestValidation: """``ScorerInvokeRequest`` rejects malformed input combos.""" - def test_missing_all_identifiers_raises(self): + def test_missing_scorer_id_raises(self): from agent_control_evaluator_galileo.luna.client import ( ScorerInvokeInputs, ScorerInvokeRequest, ) from pydantic import ValidationError - with pytest.raises(ValidationError, match="One of scorer_label"): + with pytest.raises(ValidationError, match="scorer_id"): ScorerInvokeRequest(inputs=ScorerInvokeInputs(query="hello")) -def test_client_raises_when_no_credentials(monkeypatch): - """The client requires at least an API secret or an API key.""" - for name in ( - "GALILEO_API_SECRET_KEY", - "GALILEO_API_SECRET", - "GALILEO_API_KEY", - "GALILEO_LUNA_AUTH_MODE", - ): +def test_client_raises_when_no_api_secret(monkeypatch): + """The client requires GALILEO_API_SECRET_KEY or GALILEO_API_SECRET.""" + for name in ("GALILEO_API_SECRET_KEY", "GALILEO_API_SECRET"): monkeypatch.delenv(name, raising=False) + monkeypatch.setenv("GALILEO_RUNNERS_API_URL", "http://runners-api:8090") from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY"): + with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_SECRET"): GalileoLunaClient() -def test_client_requires_explicit_mode_when_both_credentials_are_present(monkeypatch): - """A mixed credential environment must not silently choose an auth route.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") - monkeypatch.delenv("GALILEO_LUNA_AUTH_MODE", raising=False) +def test_client_raises_when_no_runners_api_url(monkeypatch): + """The client requires GALILEO_RUNNERS_API_URL.""" + monkeypatch.setenv("GALILEO_API_SECRET_KEY", "test-secret") + monkeypatch.delenv("GALILEO_RUNNERS_API_URL", raising=False) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.raises(ValueError, match="Both Galileo API key and API secret"): + with pytest.raises(ValueError, match="GALILEO_RUNNERS_API_URL"): GalileoLunaClient() -def test_client_uses_explicit_public_mode_when_both_credentials_are_present(monkeypatch): - """Explicit public mode should use the API-key route even if a secret is also set.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "public") +def test_client_jwt_has_internal_scope(monkeypatch): + """JWT produced by the client must carry internal=True and scope=scorers.invoke.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() + _, auth_header = client._endpoint_and_auth_header() - assert client.auth_mode == "public" - endpoint, request_headers = client._endpoint_and_headers(None) - assert endpoint.endswith("/scorers/invoke") - assert "Authorization" not in request_headers + assert auth_header.startswith("Bearer ") + payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) + assert payload["internal"] is True + assert payload["scope"] == "scorers.invoke" -def test_client_uses_explicit_internal_mode_when_both_credentials_are_present(monkeypatch): - """Explicit internal mode should use the internal JWT route.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "internal") +def test_client_posts_to_correct_runners_api_endpoint(monkeypatch): + """_endpoint_and_auth_header must return the runners-api /api/v1/scorers/invoke path.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() + endpoint, _ = client._endpoint_and_auth_header() - assert client.auth_mode == "internal" - endpoint, request_headers = client._endpoint_and_headers(None) - assert endpoint.endswith("/internal/scorers/invoke") - assert request_headers["Authorization"].startswith("Bearer ") - - -def test_client_rejects_mode_without_matching_credential(monkeypatch): - """The selected mode must have its matching credential configured.""" - monkeypatch.delenv("GALILEO_API_SECRET_KEY", raising=False) - monkeypatch.delenv("GALILEO_API_SECRET", raising=False) - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "internal") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - - with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY"): - GalileoLunaClient() + assert endpoint == "http://runners-api:8090/api/v1/scorers/invoke" -def test_client_rejects_invalid_auth_mode(monkeypatch): - """Invalid auth mode values should fail during client initialization.""" - monkeypatch.setenv("GALILEO_API_KEY", "public-key") - monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "sideways") +def test_client_does_not_use_old_api_paths(monkeypatch): + """The client must not reference /scorers/invoke or /internal/scorers/invoke.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - with pytest.raises(ValueError, match="GALILEO_LUNA_AUTH_MODE"): - GalileoLunaClient() - - -class TestDeriveApiUrl: - """URL derivation covers every console.* → api.* substitution branch.""" - - def _client(self, monkeypatch): - monkeypatch.delenv("GALILEO_API_SECRET_KEY", raising=False) - monkeypatch.delenv("GALILEO_API_SECRET", raising=False) - monkeypatch.delenv("GALILEO_LUNA_AUTH_MODE", raising=False) - monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - - return GalileoLunaClient() - - def test_console_dot_rewritten_to_api_dot(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://console.galileo.ai") - == "https://api.galileo.ai" - ) - - def test_console_dash_rewritten_to_api_dash(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://console-staging.galileo.ai") - == "https://api-staging.galileo.ai" - ) - - def test_plain_https_host_gets_api_prefix(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://example.com") - == "https://api.example.com" - ) - - def test_non_prefix_console_substring_gets_api_prefix(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://my-console.example.com") - == "https://api.my-console.example.com" - ) - - def test_console_substring_in_path_does_not_rewrite_path(self, monkeypatch): - client = self._client(monkeypatch) - assert ( - client._derive_api_url("https://app.galileo.ai/console.html") - == "https://api.app.galileo.ai/console.html" - ) - - def test_plain_http_host_gets_api_prefix(self, monkeypatch): - client = self._client(monkeypatch) - assert client._derive_api_url("http://example.com") == "http://api.example.com" + client = GalileoLunaClient() + endpoint, _ = client._endpoint_and_auth_header() - def test_unknown_scheme_returned_as_is(self, monkeypatch): - client = self._client(monkeypatch) - # No console./console- prefix, no http(s) scheme → return unchanged. - assert client._derive_api_url("api.example.com") == "api.example.com" + assert "/scorers/invoke" in endpoint + assert endpoint.startswith("http://runners-api:8090/api/v1/") + assert "/internal/scorers/invoke" not in endpoint @pytest.mark.asyncio -async def test_get_client_adds_api_key_header_when_no_secret(monkeypatch): - """When only an API key is configured, the public-API header is set.""" - monkeypatch.delenv("GALILEO_API_SECRET_KEY", raising=False) - monkeypatch.delenv("GALILEO_API_SECRET", raising=False) - monkeypatch.setenv("GALILEO_API_KEY", "public-key") +async def test_get_client_does_not_set_galileo_api_key_header(monkeypatch): + """The HTTP client must never include a Galileo-API-Key header.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() http_client = await client._get_client() try: - assert http_client.headers.get("Galileo-API-Key") == "public-key" - finally: - await client.close() - - -@pytest.mark.asyncio -async def test_invoke_rejects_missing_scorer_identifier(monkeypatch): - monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - - client = GalileoLunaClient() - try: - with pytest.raises(ValueError, match="At least one scorer identifier"): - await client.invoke(input="hello") + assert "Galileo-API-Key" not in http_client.headers + assert "galileo-api-key" not in http_client.headers finally: await client.close() @@ -602,7 +503,8 @@ async def test_invoke_rejects_missing_scorer_identifier(monkeypatch): @pytest.mark.asyncio async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): """A non-object JSON body must surface as a clear RuntimeError.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -618,7 +520,7 @@ async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): try: with pytest.raises(RuntimeError, match="not a JSON object"): - await client.invoke(scorer_label="toxicity", input="hello") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() @@ -626,7 +528,8 @@ async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): @pytest.mark.asyncio async def test_invoke_propagates_http_status_error(monkeypatch): """The client logs and re-raises HTTP status errors.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -647,7 +550,7 @@ async def test_invoke_propagates_http_status_error(monkeypatch): try: with pytest.raises(httpx.HTTPStatusError): - await client.invoke(scorer_label="toxicity", input="hello") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() @@ -655,7 +558,8 @@ async def test_invoke_propagates_http_status_error(monkeypatch): @pytest.mark.asyncio async def test_invoke_propagates_request_error(monkeypatch): """RequestError is logged and re-raised so callers can decide policy.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -667,7 +571,7 @@ async def test_invoke_propagates_request_error(monkeypatch): try: with pytest.raises(httpx.RequestError): - await client.invoke(scorer_label="toxicity", input="hello") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() @@ -675,13 +579,68 @@ async def test_invoke_propagates_request_error(monkeypatch): @pytest.mark.asyncio async def test_client_async_context_manager_closes_on_exit(monkeypatch): """Entering/exiting the async context manager must close the client.""" - monkeypatch.setenv("GALILEO_API_KEY", "test-key") + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) from agent_control_evaluator_galileo.luna.client import GalileoLunaClient async with GalileoLunaClient() as client: - # Trigger lazy client creation so close() has work to do. await client._get_client() assert client._client is not None - # __aexit__ closes the underlying httpx client. assert client._client is None + + +@pytest.mark.asyncio +async def test_invoke_strips_caller_supplied_galileo_api_key_header(monkeypatch): + """Regression: a Galileo-API-Key passed via the headers kwarg must be stripped.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + captured: dict[str, object] = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["headers"] = dict(request.headers) + return httpx.Response(200, json={"score": 0.9, "status": "success"}) + + client = GalileoLunaClient() + client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + + try: + await client.invoke( + scorer_id="scorer-123", + input="hello", + headers={"Galileo-API-Key": "should-be-stripped", "X-Custom": "keep-me"}, + ) + finally: + await client.close() + + headers = captured["headers"] + assert isinstance(headers, dict) + assert "galileo-api-key" not in headers + assert headers.get("x-custom") == "keep-me" + + +@pytest.mark.asyncio +async def test_invoke_always_emits_config_field(monkeypatch): + """Regression: config must always be present in the request body, defaulting to {}.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + captured: dict[str, object] = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["body"] = json.loads(request.content.decode()) + return httpx.Response(200, json={"score": 0.5, "status": "success"}) + + client = GalileoLunaClient() + client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + + try: + await client.invoke(scorer_id="scorer-123", input="hello") + finally: + await client.close() + + assert "config" in captured["body"] + assert captured["body"]["config"] == {} diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py index f123e214..78d21e68 100644 --- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py +++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py @@ -12,6 +12,11 @@ from agent_control_models import EvaluatorResult from pydantic import ValidationError +RUNNERS_ENV = { + "GALILEO_API_SECRET_KEY": "test-secret", + "GALILEO_RUNNERS_API_URL": "http://runners-api:8090", +} + def _decode_jwt_payload(token: str) -> dict[str, object]: payload_segment = token.split(".")[1] @@ -22,23 +27,21 @@ def _decode_jwt_payload(token: str) -> dict[str, object]: class TestLunaEvaluatorConfig: """Tests for direct Luna evaluator configuration.""" - def test_config_accepts_direct_scorer_fields(self) -> None: + def test_config_accepts_scorer_id_with_all_optional_fields(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig - # Given: a direct scorer config with local thresholding config = LunaEvaluatorConfig( - scorer_label="toxicity", scorer_id="scorer-123", scorer_version_id="version-123", + scorer_label="toxicity", threshold=0.7, operator="gte", config={"temperature": 0}, ) - # Then: config is retained without Protect concepts - assert config.scorer_label == "toxicity" assert config.scorer_id == "scorer-123" assert config.scorer_version_id == "version-123" + assert config.scorer_label == "toxicity" assert config.threshold == 0.7 assert config.operator == "gte" assert config.scorer_config == {"temperature": 0} @@ -52,40 +55,58 @@ def test_config_accepts_scorer_id_without_label(self) -> None: assert config.scorer_id == "scorer-123" assert config.scorer_label is None - def test_config_requires_a_scorer_identifier(self) -> None: + def test_config_requires_scorer_id(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig - with pytest.raises(ValidationError, match="one of scorer_label"): + with pytest.raises(ValidationError, match="scorer_id"): LunaEvaluatorConfig(threshold=0.5) + def test_config_rejects_label_only(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + + with pytest.raises(ValidationError, match="scorer_id"): + LunaEvaluatorConfig(scorer_label="toxicity", threshold=0.5) + + def test_config_rejects_version_only(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + + with pytest.raises(ValidationError, match="scorer_id"): + LunaEvaluatorConfig(scorer_version_id="version-123", threshold=0.5) + def test_numeric_operator_requires_numeric_threshold(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig - # Given/When/Then: numeric local comparison rejects non-numeric thresholds with pytest.raises(ValidationError, match="numeric threshold"): - LunaEvaluatorConfig(scorer_label="toxicity", threshold="high", operator="gte") + LunaEvaluatorConfig(scorer_id="scorer-123", threshold="high", operator="gte") class TestGalileoLunaClient: """Tests for the GalileoLunaClient HTTP contract.""" - def test_scorer_invoke_request_matches_api_schema_shape(self) -> None: + def test_scorer_invoke_request_requires_scorer_id(self) -> None: + from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest + + with pytest.raises(ValidationError, match="scorer_id"): + ScorerInvokeRequest( + scorer_label="toxicity", + inputs=ScorerInvokeInputs(query="hello"), + ) + + def test_scorer_invoke_request_shape_with_all_fields(self) -> None: from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest - # Given: a scorer request with scorer config request = ScorerInvokeRequest( - scorer_label="toxicity", scorer_id="scorer-123", scorer_version_id="version-123", + scorer_label="toxicity", inputs=ScorerInvokeInputs(query={"messages": [{"role": "user", "content": "hello"}]}), config={"top_k": 1}, ) - # Then: the serialized payload uses the API-owned scorer invoke fields assert request.to_dict() == { - "scorer_label": "toxicity", "scorer_id": "scorer-123", "scorer_version_id": "version-123", + "scorer_label": "toxicity", "inputs": { "query": {"messages": [{"role": "user", "content": "hello"}]}, "response": "", @@ -93,23 +114,35 @@ def test_scorer_invoke_request_matches_api_schema_shape(self) -> None: "config": {"top_k": 1}, } + def test_scorer_invoke_request_omits_optional_fields_when_absent(self) -> None: + from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest + + request = ScorerInvokeRequest( + scorer_id="scorer-123", + inputs=ScorerInvokeInputs(query="hello"), + ) + + body = request.to_dict() + assert body["scorer_id"] == "scorer-123" + assert "scorer_version_id" not in body + assert "scorer_label" not in body + assert body["config"] == {} + @pytest.mark.parametrize("empty_value", ["", " ", {}, []]) def test_scorer_invoke_request_requires_input_or_output(self, empty_value: object) -> None: from agent_control_evaluator_galileo.luna import ScorerInvokeRequest - # Given/When/Then: the request mirrors API validation with pytest.raises( ValidationError, match="Either inputs.query or inputs.response must be set" ): ScorerInvokeRequest( - scorer_label="toxicity", + scorer_id="scorer-123", inputs={"query": empty_value, "response": empty_value}, ) - def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: + def test_scorer_invoke_response_shape(self) -> None: from agent_control_evaluator_galileo.luna import ScorerInvokeResponse - # Given: an API scorer invoke response response = ScorerInvokeResponse.from_dict( { "scorer_label": "toxicity", @@ -120,7 +153,6 @@ def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: } ) - # Then: the model exposes the API response fields assert response.model_dump() == { "scorer_label": "toxicity", "score": 0.82, @@ -128,47 +160,10 @@ def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: "execution_time": 0.12, "error_message": None, } - assert response.scorer_label == "toxicity" assert response.raw_response["scorer_label"] == "toxicity" - def test_client_uses_protect_api_url_derivation(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: the same console URL shape used by Protect - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}): - client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io") - - # Then: the API URL is derived the same way - assert client.api_base == "https://api.demo-v2.galileocloud.io" - - def test_client_uses_galileo_api_url_when_set(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: an explicit devstack API URL - with patch.dict( - os.environ, - { - "GALILEO_API_KEY": "test-key", - "GALILEO_API_URL": "https://api-test-luna.gcp-dev.galileo.ai/", - }, - ): - client = GalileoLunaClient(console_url="https://console-test-luna.gcp-dev.galileo.ai") - - # Then: the explicit API URL wins over console URL derivation - assert client.api_base == "https://api-test-luna.gcp-dev.galileo.ai" - - def test_client_derives_api_url_from_console_dash_hostname(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - - # Given: a console- devstack hostname - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=False): - client = GalileoLunaClient(console_url="https://console-test-luna.gcp-dev.galileo.ai") - - # Then: the matching api- hostname is used - assert client.api_base == "https://api-test-luna.gcp-dev.galileo.ai" - @pytest.mark.asyncio - async def test_client_posts_to_scorers_invoke_without_protect_fields(self) -> None: + async def test_client_posts_to_runners_api_scorer_invoke(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient captured: dict[str, object] = {} @@ -187,21 +182,14 @@ def handler(request: httpx.Request) -> httpx.Response: }, ) - # Given: a Luna client with a mock HTTP transport - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}): - client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io") - client._client = httpx.AsyncClient( - transport=httpx.MockTransport(handler), - headers={ - "Galileo-API-Key": client.api_key, - "Content-Type": "application/json", - }, - ) + # Given: a Luna client pointing at runners-api + with patch.dict(os.environ, RUNNERS_ENV, clear=True): + client = GalileoLunaClient() + client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) try: - # When: invoking a scorer response = await client.invoke( - scorer_label="toxicity", + scorer_id="scorer-123", input="user prompt", output="model answer", config={"top_k": 1}, @@ -209,103 +197,74 @@ def handler(request: httpx.Request) -> httpx.Response: finally: await client.close() - # Then: the direct scorer endpoint and body are used + # Then: posts to runners-api /api/v1/scorers/invoke with JWT, no Galileo-API-Key assert response.score == 0.82 - assert captured["url"] == "https://api.demo-v2.galileocloud.io/scorers/invoke" + assert captured["url"] == "http://runners-api:8090/api/v1/scorers/invoke" assert captured["body"] == { - "scorer_label": "toxicity", + "scorer_id": "scorer-123", "inputs": {"query": "user prompt", "response": "model answer"}, "config": {"top_k": 1}, } - assert "stage_name" not in captured["body"] - assert "prioritized_rulesets" not in captured["body"] headers = captured["headers"] assert isinstance(headers, dict) - assert headers["galileo-api-key"] == "test-key" + assert "galileo-api-key" not in headers + auth_header = headers["authorization"] + assert isinstance(auth_header, str) + assert auth_header.startswith("Bearer ") + payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) + assert payload["internal"] is True + assert payload["scope"] == "scorers.invoke" @pytest.mark.asyncio - async def test_client_uses_internal_jwt_when_api_secret_is_set(self) -> None: + async def test_client_forwards_scorer_version_id_when_configured(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient captured: dict[str, object] = {} def handler(request: httpx.Request) -> httpx.Response: - captured["url"] = str(request.url) - captured["headers"] = dict(request.headers) captured["body"] = json.loads(request.content.decode()) return httpx.Response( - 200, - json={ - "scorer_label": "toxicity", - "score": 0.82, - "status": "success", - "execution_time": 0.12, - }, + 200, json={"score": 0.5, "status": "success"} ) - # Given: a Luna client configured with the Galileo API internal secret - with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True): - client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088") + with patch.dict(os.environ, RUNNERS_ENV, clear=True): + client = GalileoLunaClient() client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) try: - # When: invoking a scorer with internal JWT auth - response = await client.invoke(scorer_label="toxicity", output="model answer") + await client.invoke( + scorer_id="scorer-123", + scorer_version_id="version-456", + input="hello", + ) finally: await client.close() - # Then: the internal scorer endpoint is called with an internal JWT - assert response.score == 0.82 - assert ( - captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke" - ) - assert captured["body"] == { - "scorer_label": "toxicity", - "inputs": {"query": "", "response": "model answer"}, - } - headers = captured["headers"] - assert isinstance(headers, dict) - assert "galileo-api-key" not in headers - auth_header = headers["authorization"] - assert isinstance(auth_header, str) - assert auth_header.startswith("Bearer ") - token_payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) - assert token_payload["internal"] is True - assert token_payload["scope"] == "scorers.invoke" + assert captured["body"]["scorer_version_id"] == "version-456" @pytest.mark.asyncio - async def test_client_uses_internal_jwt_without_api_key(self) -> None: + async def test_client_omits_galileo_api_key_even_when_env_is_set(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient - # Given: a Luna client configured with internal JWT auth - with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True): - client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088") - captured: dict[str, object] = {} def handler(request: httpx.Request) -> httpx.Response: captured["headers"] = dict(request.headers) - return httpx.Response( - 200, - json={"scorer_label": "toxicity", "score": 0.82, "status": "success"}, - ) + return httpx.Response(200, json={"score": 0.5, "status": "success"}) + env = {**RUNNERS_ENV, "GALILEO_API_KEY": "should-not-be-sent"} + with patch.dict(os.environ, env, clear=True): + client = GalileoLunaClient() client._client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + try: - # When: invoking without project context - response = await client.invoke(scorer_label="toxicity", output="model answer") + await client.invoke(scorer_id="scorer-123", input="hello") finally: await client.close() - # Then: internal JWT auth still works - assert response.score == 0.82 headers = captured["headers"] assert isinstance(headers, dict) - auth_header = headers["authorization"] - assert isinstance(auth_header, str) - token_payload = _decode_jwt_payload(auth_header.removeprefix("Bearer ")) - assert token_payload["internal"] is True - assert token_payload["scope"] == "scorers.invoke" + assert "galileo-api-key" not in headers @pytest.mark.asyncio @pytest.mark.parametrize("empty_value", ["", " ", {}, []]) @@ -314,19 +273,17 @@ async def test_client_rejects_missing_input_and_output_values( ) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient - # Given: a Luna client and scorer input values that API treats as missing - with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): - client = GalileoLunaClient(api_url="https://api.default.svc.cluster.local:8088") + with patch.dict(os.environ, RUNNERS_ENV, clear=True): + client = GalileoLunaClient() - # When/Then: the client rejects the request before calling API with pytest.raises(ValueError, match="At least one of input or output must be provided"): - await client.invoke(scorer_label="toxicity", input=empty_value, output=empty_value) + await client.invoke(scorer_id="scorer-123", input=empty_value, output=empty_value) class TestLunaEvaluator: """Tests for direct Luna evaluator behavior.""" - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) def test_evaluator_metadata(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator @@ -337,31 +294,26 @@ def test_evaluator_metadata(self) -> None: def test_evaluator_init_without_auth_raises(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator - with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_KEY"): - LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_SECRET"): + LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) - @patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True) + @patch.dict(os.environ, RUNNERS_ENV, clear=True) def test_evaluator_init_accepts_api_secret(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator - evaluator = LunaEvaluator.from_dict( - { - "scorer_label": "toxicity", - "threshold": 0.5, - } - ) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) - assert evaluator.config.scorer_label == "toxicity" + assert evaluator.config.scorer_id == "scorer-123" - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: a direct Luna evaluator and a raw successful scorer response evaluator = LunaEvaluator.from_dict( { + "scorer_id": "scorer-123", "scorer_label": "toxicity", "threshold": 0.7, "operator": "gte", @@ -377,7 +329,6 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: execution_time=0.1, ) - # When: evaluating a full step payload result = await evaluator.evaluate( { "input": "user prompt", @@ -385,11 +336,11 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: } ) - # Then: the raw score is thresholded locally and no Protect fields are sent assert isinstance(result, EvaluatorResult) assert result.matched is True assert result.confidence == 0.82 assert result.metadata == { + "scorer_id": "scorer-123", "scorer_label": "toxicity", "score": 0.82, "threshold": 0.7, @@ -399,6 +350,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: "error_message": None, } mock_invoke.assert_awaited_once_with( + scorer_id="scorer-123", scorer_label="toxicity", input="user prompt", output="model answer", @@ -406,15 +358,14 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: timeout=5.0, ) - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_returns_non_match_below_threshold(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: a raw scorer value below the local threshold evaluator = LunaEvaluator.from_dict( - {"scorer_label": "toxicity", "threshold": 0.7, "operator": "gte"} + {"scorer_id": "scorer-123", "threshold": 0.7, "operator": "gte"} ) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: @@ -424,56 +375,48 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None: status="success", ) - # When: evaluating selected scalar data result = await evaluator.evaluate("hello") - # Then: the control does not match assert result.matched is False assert result.confidence == 0.2 mock_invoke.assert_awaited_once_with( - scorer_label="toxicity", + scorer_id="scorer-123", input="hello", output=None, config=None, timeout=10.0, ) - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio @pytest.mark.parametrize("data", ["", " "]) async def test_evaluator_does_not_call_api_for_empty_data(self, data: str) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: an evaluator and empty selected data - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: - # When: evaluating empty data result = await evaluator.evaluate(data) - # Then: no remote scorer call is made assert result.matched is False assert result.confidence == 1.0 assert result.message == "No data to score with Luna" mock_invoke.assert_not_called() - @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_fail_open_sets_error(self) -> None: from agent_control_evaluator_galileo.luna import LunaEvaluator from agent_control_evaluator_galileo.luna.client import GalileoLunaClient - # Given: fixed fail-open behavior for scorer errors - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + evaluator = LunaEvaluator.from_dict({"scorer_id": "scorer-123", "threshold": 0.5}) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: mock_invoke.side_effect = RuntimeError("service unavailable") - # When: the scorer call fails result = await evaluator.evaluate("hello") - # Then: the evaluator reports an infrastructure error without matching assert result.matched is False assert result.error == "service unavailable" assert result.metadata is not None diff --git a/examples/README.md b/examples/README.md index ffe9b46a..daa27094 100644 --- a/examples/README.md +++ b/examples/README.md @@ -13,7 +13,7 @@ This directory contains runnable examples for Agent Control. Each example has it | Google ADK Decorator | Tool-only `@control()` pattern for explicit ADK tool protection. | https://docs.agentcontrol.dev/examples/google-adk-decorator | | Customer Support Agent | Enterprise scenario with PII protection, prompt-injection defense, and multiple tools. | https://docs.agentcontrol.dev/examples/customer-support | | DeepEval | Build a custom evaluator using DeepEval GEval metrics. | https://docs.agentcontrol.dev/examples/deepeval | -| Galileo Luna Direct | Direct `/scorers/invoke` Luna evaluation with a composite Agent Control condition. | `examples/galileo_luna/` | +| Galileo Luna Direct | Direct `/api/v1/scorers/invoke` Luna evaluation (`scorer_id` required; `scorer_label`/`scorer_version_id` optional) with a composite Agent Control condition. | `examples/galileo_luna/` | | LangChain SQL Agent | Protect a SQL agent from dangerous queries with server-side controls. | https://docs.agentcontrol.dev/examples/langchain-sql | | Steer Action Demo | Banking transfer agent showcasing observe, deny, and steer actions. | https://docs.agentcontrol.dev/examples/steer-action-demo | | Target Context | Bind controls to opaque external targets (e.g. `env=prod`) and let the SDK pin one target per session. | https://docs.agentcontrol.dev/examples/target-context | diff --git a/examples/galileo_luna/README.md b/examples/galileo_luna/README.md index 5ac97cda..33a90fd3 100644 --- a/examples/galileo_luna/README.md +++ b/examples/galileo_luna/README.md @@ -1,6 +1,6 @@ # Galileo Luna Direct Evaluator Example -This example shows an Agent Control agent using the direct Galileo Luna evaluator (`galileo.luna`). The evaluator calls Galileo's `/scorers/invoke` API and applies thresholds locally from the control definition. +This example shows an Agent Control agent using the direct Galileo Luna evaluator (`galileo.luna`). The evaluator calls runners-api at `/api/v1/scorers/invoke` and applies thresholds locally from the control definition. ## What It Shows @@ -17,29 +17,24 @@ Start the Agent Control server from the repo root: make server-run ``` -Configure Galileo public API-key auth: +Configure runners-api credentials: ```bash -export GALILEO_LUNA_AUTH_MODE="public" -export GALILEO_API_KEY="your-api-key" -export GALILEO_CONSOLE_URL="https://console.demo-v2.galileocloud.io" +export GALILEO_API_SECRET_KEY="your-api-secret" +export GALILEO_RUNNERS_API_URL="http://runners-api:8090" ``` -For internal deployments, use internal auth instead: +Required scorer setting: ```bash -export GALILEO_LUNA_AUTH_MODE="internal" -export GALILEO_API_SECRET_KEY="your-api-secret" -export GALILEO_API_URL="https://api.default.svc.cluster.local:8088" +export GALILEO_LUNA_SCORER_ID="your-scorer-uuid" ``` Optional scorer settings: ```bash -export GALILEO_LUNA_SCORER_LABEL="toxicity" -# Or select by scorer id/version instead of label: -# export GALILEO_LUNA_SCORER_ID="scorer-id" -# export GALILEO_LUNA_SCORER_VERSION_ID="scorer-version-id" +export GALILEO_LUNA_SCORER_LABEL="toxicity" # display/metadata label only +export GALILEO_LUNA_SCORER_VERSION_ID="version-uuid" # pin a specific scorer version export GALILEO_LUNA_THRESHOLD="0.5" export GALILEO_LUNA_PAYLOAD_FIELD="output" ``` @@ -50,10 +45,6 @@ scalar as the scorer `output` field. If a selector returns structured data with `input` and/or `output` keys, those keys are sent directly and override `GALILEO_LUNA_PAYLOAD_FIELD`. -If both `GALILEO_API_KEY` and `GALILEO_API_SECRET_KEY`/`GALILEO_API_SECRET` are -set, `GALILEO_LUNA_AUTH_MODE` is required so the client does not silently choose -an auth path. - Run: ```bash diff --git a/examples/galileo_luna/demo_agent.py b/examples/galileo_luna/demo_agent.py index 8c7f59b2..af95ce70 100644 --- a/examples/galileo_luna/demo_agent.py +++ b/examples/galileo_luna/demo_agent.py @@ -4,7 +4,9 @@ Prerequisites: 1. Start server: make server-run 2. Create controls: uv run python setup_controls.py - 3. Set Galileo credentials where this script runs + 3. Set Galileo credentials where this script runs: + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET + GALILEO_RUNNERS_API_URL Usage: uv run python demo_agent.py @@ -21,7 +23,6 @@ AGENT_NAME = "galileo-luna-agent" SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000") -LUNA_AUTH_MODE = os.getenv("GALILEO_LUNA_AUTH_MODE") logging.basicConfig( level=logging.INFO, @@ -91,37 +92,25 @@ def init_agent() -> None: async def run_demo() -> None: """Run scripted scenarios.""" - api_key = os.getenv("GALILEO_API_KEY") api_secret = os.getenv("GALILEO_API_SECRET_KEY") or os.getenv("GALILEO_API_SECRET") - if not api_key and not api_secret: - print( - "Galileo credentials are required for the galileo.luna evaluator. " - "Set GALILEO_API_KEY for public mode or GALILEO_API_SECRET_KEY for " - "internal mode." - ) - return - if api_key and api_secret and LUNA_AUTH_MODE not in {"public", "internal"}: + runners_url = os.getenv("GALILEO_RUNNERS_API_URL") + + if not api_secret: print( - "Both GALILEO_API_KEY and GALILEO_API_SECRET_KEY/GALILEO_API_SECRET are set. " - "Set GALILEO_LUNA_AUTH_MODE to 'public' or 'internal'." + "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required for the " + "galileo.luna evaluator." ) return - if LUNA_AUTH_MODE == "public" and not api_key: - print("GALILEO_API_KEY is required when GALILEO_LUNA_AUTH_MODE=public.") - return - if LUNA_AUTH_MODE == "internal" and not api_secret: - print( - "GALILEO_API_SECRET_KEY or GALILEO_API_SECRET is required when " - "GALILEO_LUNA_AUTH_MODE=internal." - ) + if not runners_url: + print("GALILEO_RUNNERS_API_URL is required for the galileo.luna evaluator.") return print("=" * 72) print("Direct Galileo Luna Evaluator Demo") print("=" * 72) - print(f"Server: {SERVER_URL}") - print(f"Agent: {AGENT_NAME}") - print(f"Auth: GALILEO_LUNA_AUTH_MODE={LUNA_AUTH_MODE or '(auto if one credential)'}") + print(f"Server: {SERVER_URL}") + print(f"Agent: {AGENT_NAME}") + print(f"Runners API: {runners_url}") print() init_agent() diff --git a/examples/galileo_luna/setup_controls.py b/examples/galileo_luna/setup_controls.py index fb4c6c76..b44c2dde 100644 --- a/examples/galileo_luna/setup_controls.py +++ b/examples/galileo_luna/setup_controls.py @@ -4,8 +4,9 @@ Prerequisites: - Agent Control server running at AGENT_CONTROL_URL, default http://localhost:8000 - Galileo credentials set where demo_agent.py will run: - GALILEO_API_KEY with GALILEO_LUNA_AUTH_MODE=public, or - GALILEO_API_SECRET_KEY/GALILEO_API_SECRET with GALILEO_LUNA_AUTH_MODE=internal + GALILEO_API_SECRET_KEY or GALILEO_API_SECRET + GALILEO_RUNNERS_API_URL + GALILEO_LUNA_SCORER_ID (required) Usage: uv run python setup_controls.py @@ -24,13 +25,14 @@ AGENT_DESCRIPTION = "Demo agent protected by direct Galileo Luna scorer controls" SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000") -LUNA_SCORER_LABEL = os.getenv("GALILEO_LUNA_SCORER_LABEL", "toxicity") LUNA_SCORER_ID = os.getenv("GALILEO_LUNA_SCORER_ID") +LUNA_SCORER_LABEL = os.getenv("GALILEO_LUNA_SCORER_LABEL") LUNA_SCORER_VERSION_ID = os.getenv("GALILEO_LUNA_SCORER_VERSION_ID") LUNA_THRESHOLD = float(os.getenv("GALILEO_LUNA_THRESHOLD", "0.5")) LUNA_PAYLOAD_FIELD = os.getenv("GALILEO_LUNA_PAYLOAD_FIELD", "output") -LUNA_AUTH_MODE = os.getenv("GALILEO_LUNA_AUTH_MODE") +if not LUNA_SCORER_ID: + raise ValueError("GALILEO_LUNA_SCORER_ID is required.") if LUNA_PAYLOAD_FIELD not in {"input", "output"}: raise ValueError("GALILEO_LUNA_PAYLOAD_FIELD must be either 'input' or 'output'.") @@ -48,14 +50,13 @@ def luna_config() -> dict[str, Any]: """Build the direct Luna evaluator config used by the composite control.""" config: dict[str, Any] = { + "scorer_id": LUNA_SCORER_ID, "threshold": LUNA_THRESHOLD, "operator": "gte", "payload_field": LUNA_PAYLOAD_FIELD, } if LUNA_SCORER_LABEL: config["scorer_label"] = LUNA_SCORER_LABEL - if LUNA_SCORER_ID: - config["scorer_id"] = LUNA_SCORER_ID if LUNA_SCORER_VERSION_ID: config["scorer_version_id"] = LUNA_SCORER_VERSION_ID return config @@ -169,13 +170,12 @@ async def setup_demo() -> None: print(f"Agent: {AGENT_NAME}") print( "Luna: " - f"scorer_label={LUNA_SCORER_LABEL!r}, " f"scorer_id={LUNA_SCORER_ID!r}, " + f"scorer_label={LUNA_SCORER_LABEL!r}, " f"scorer_version_id={LUNA_SCORER_VERSION_ID!r}, " f"threshold={LUNA_THRESHOLD}, " f"payload_field={LUNA_PAYLOAD_FIELD!r}" ) - print(f"Auth: GALILEO_LUNA_AUTH_MODE={LUNA_AUTH_MODE or '(auto if one credential)'}") async with AgentControlClient(base_url=SERVER_URL, timeout=30.0) as client: await client.health_check() diff --git a/models/pyproject.toml b/models/pyproject.toml index 9ee15f94..9c955cfd 100644 --- a/models/pyproject.toml +++ b/models/pyproject.toml @@ -6,6 +6,7 @@ requires-python = ">=3.12" dependencies = [ "pydantic>=2.12.4", "jsonschema>=4.0.0", + "google-re2>=1.1", ] authors = [ {name = "Agent Control Team"} diff --git a/server/tests/test_init_agent.py b/server/tests/test_init_agent.py index 2dfe9eaa..9b7ba5ba 100644 --- a/server/tests/test_init_agent.py +++ b/server/tests/test_init_agent.py @@ -46,11 +46,24 @@ def make_agent_payload( } +def _collect_paths(routes: list, prefix: str = "") -> set[str]: + """Recursively collect route paths, handling FastAPI's _IncludedRouter wrapper.""" + paths: set[str] = set() + for route in routes: + path = getattr(route, "path", None) + if path: + paths.add(prefix + path) + nested = getattr(route, "original_router", None) + if nested: + ctx = getattr(route, "include_context", None) + pfx = getattr(ctx, "prefix", "") if ctx else "" + paths |= _collect_paths(nested.routes, prefix + pfx) + return paths + + def test_init_agent_route_exists(app: FastAPI) -> None: - # Given: an application router - paths = {getattr(route, "path", None) for route in app.router.routes} - # When: inspecting registered paths - # (computation done above to gather all paths) + # Given: an application router (FastAPI 0.130+ uses _IncludedRouter wrappers) + paths = _collect_paths(app.routes) # Then: initAgent and agent retrieval endpoints are present assert "/api/v1/agents/initAgent" in paths assert "/api/v1/agents/{agent_name}" in paths From f93f39e8044c568a8830e402dbff6d92be0e3c5d Mon Sep 17 00:00:00 2001 From: Namrata Ghadi Date: Tue, 16 Jun 2026 23:40:18 -0700 Subject: [PATCH 4/6] do not require version_id --- .../luna/client.py | 15 +++++++++ .../luna/evaluator.py | 2 -- .../galileo/tests/test_luna_coverage_gaps.py | 31 +++++++++++++++++ .../galileo/tests/test_luna_evaluator.py | 33 +++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py index 6a2942fe..3751f771 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py @@ -20,6 +20,8 @@ DEFAULT_INTERNAL_TOKEN_TTL_SECS = 3600 RUNNERS_SCORER_INVOKE_PATH = "/api/v1/scorers/invoke" RUNNERS_API_URL_ENV = "GALILEO_RUNNERS_API_URL" +RUNNERS_API_CA_FILE_ENV = "GALILEO_RUNNERS_API_CA_FILE" +AUTH_UPSTREAM_CA_FILE_ENV = "AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE" # Headers that must never be forwarded to runners-api (checked case-insensitively). _BLOCKED_REQUEST_HEADERS = frozenset({"galileo-api-key"}) @@ -156,6 +158,7 @@ def __init__( self, api_secret: str | None = None, runners_api_url: str | None = None, + runners_api_ca_file: str | None = None, ) -> None: """Initialize the Galileo Luna client. @@ -164,6 +167,10 @@ def __init__( GALILEO_API_SECRET_KEY or GALILEO_API_SECRET. runners_api_url: runners-api base URL. If not provided, reads from GALILEO_RUNNERS_API_URL. + runners_api_ca_file: Optional CA bundle used to verify runners-api + TLS. If not provided, reads from GALILEO_RUNNERS_API_CA_FILE, + then AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE for Galileo in-cluster + deployments that already mount the internal CA. Raises: ValueError: If the API secret or runners-api URL is not configured. @@ -187,14 +194,22 @@ def __init__( self.api_secret = resolved_api_secret self.runners_api_base = resolved_runners_url.rstrip("/") + self.runners_api_ca_file = ( + runners_api_ca_file + or os.getenv(RUNNERS_API_CA_FILE_ENV) + or os.getenv(AUTH_UPSTREAM_CA_FILE_ENV) + or None + ) self._client: httpx.AsyncClient | None = None async def _get_client(self) -> httpx.AsyncClient: """Get or create the HTTP client.""" if self._client is None or self._client.is_closed: + verify: str | bool = self.runners_api_ca_file or True self._client = httpx.AsyncClient( headers={"Content-Type": "application/json"}, timeout=httpx.Timeout(DEFAULT_TIMEOUT_SECS), + verify=verify, ) return self._client diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py index f204b21d..41e1a22c 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py @@ -221,8 +221,6 @@ def _base_metadata(self) -> dict[str, Any]: def _scorer_kwargs(self) -> dict[str, Any]: kwargs: dict[str, Any] = {"scorer_id": self.config.scorer_id} - if self.config.scorer_version_id is not None: - kwargs["scorer_version_id"] = self.config.scorer_version_id if self.config.scorer_label is not None: kwargs["scorer_label"] = self.config.scorer_label return kwargs diff --git a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py b/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py index cfbccdc4..3a2eb25c 100644 --- a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py +++ b/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py @@ -500,6 +500,37 @@ async def test_get_client_does_not_set_galileo_api_key_header(monkeypatch): await client.close() +@pytest.mark.asyncio +async def test_get_client_uses_configured_runners_api_ca_file(monkeypatch): + """The HTTP client should verify internal runners-api TLS with the configured CA.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + monkeypatch.setenv("GALILEO_RUNNERS_API_CA_FILE", "/etc/galileo/runners-api-ca.crt") + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + with patch("httpx.AsyncClient") as async_client: + client = GalileoLunaClient() + await client._get_client() + + assert async_client.call_args.kwargs["verify"] == "/etc/galileo/runners-api-ca.crt" + + +@pytest.mark.asyncio +async def test_get_client_falls_back_to_agent_control_auth_upstream_ca_file(monkeypatch): + """Galileo in-cluster Agent Control pods already mount the internal CA for auth upstream.""" + for key, value in RUNNERS_ENV.items(): + monkeypatch.setenv(key, value) + monkeypatch.delenv("GALILEO_RUNNERS_API_CA_FILE", raising=False) + monkeypatch.setenv("AGENT_CONTROL_AUTH_UPSTREAM_CA_FILE", "/etc/agent-control/auth-upstream-ca/ca.crt") + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + with patch("httpx.AsyncClient") as async_client: + client = GalileoLunaClient() + await client._get_client() + + assert async_client.call_args.kwargs["verify"] == "/etc/agent-control/auth-upstream-ca/ca.crt" + + @pytest.mark.asyncio async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): """A non-object JSON body must surface as a clear RuntimeError.""" diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py index 78d21e68..349fcc82 100644 --- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py +++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py @@ -358,6 +358,39 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: timeout=5.0, ) + @patch.dict(os.environ, RUNNERS_ENV) + @pytest.mark.asyncio + async def test_evaluator_does_not_forward_configured_scorer_version_id(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + evaluator = LunaEvaluator.from_dict( + { + "scorer_id": "scorer-123", + "scorer_version_id": "version-456", + "threshold": 0.5, + "operator": "gte", + } + ) + + with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: + mock_invoke.return_value = ScorerInvokeResponse( + score=0.82, + status="success", + ) + + result = await evaluator.evaluate("hello") + + assert result.matched is True + assert result.metadata["scorer_version_id"] == "version-456" + mock_invoke.assert_awaited_once_with( + scorer_id="scorer-123", + input="hello", + output=None, + config=None, + timeout=10.0, + ) + @patch.dict(os.environ, RUNNERS_ENV) @pytest.mark.asyncio async def test_evaluator_returns_non_match_below_threshold(self) -> None: From 55d69ac65fd11b6d675ea3105c2a68cfec68320f Mon Sep 17 00:00:00 2001 From: Namrata Ghadi Date: Thu, 18 Jun 2026 14:04:20 -0700 Subject: [PATCH 5/6] sqlglot error --- evaluators/builtin/pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/evaluators/builtin/pyproject.toml b/evaluators/builtin/pyproject.toml index 46199d83..82b4bdcb 100644 --- a/evaluators/builtin/pyproject.toml +++ b/evaluators/builtin/pyproject.toml @@ -11,8 +11,7 @@ dependencies = [ "pydantic>=2.12.4", "google-re2>=1.1", "jsonschema>=4.0.0", - "sqlglot[c]>=29.0.0,<29.1.0", - "sqlglotc>=29.0.0,<29.1.0", + "sqlglot>=29.0.0,<29.1.0", ] [project.optional-dependencies] From adb96fe5fbc7913880faaf9062760d24cc76006a Mon Sep 17 00:00:00 2001 From: Namrata Ghadi Date: Thu, 18 Jun 2026 14:10:42 -0700 Subject: [PATCH 6/6] remove the sqlglot changes --- evaluators/builtin/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/evaluators/builtin/pyproject.toml b/evaluators/builtin/pyproject.toml index 82b4bdcb..46199d83 100644 --- a/evaluators/builtin/pyproject.toml +++ b/evaluators/builtin/pyproject.toml @@ -11,7 +11,8 @@ dependencies = [ "pydantic>=2.12.4", "google-re2>=1.1", "jsonschema>=4.0.0", - "sqlglot>=29.0.0,<29.1.0", + "sqlglot[c]>=29.0.0,<29.1.0", + "sqlglotc>=29.0.0,<29.1.0", ] [project.optional-dependencies]