From 848e3cefea9af4d108c9349387625def71a5433c Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Mon, 15 Jun 2026 16:36:34 -0700 Subject: [PATCH] feat(gateway): spike runtime settings file Signed-off-by: Taylor Mutch --- architecture/gateway.md | 10 + crates/openshell-server/src/config_file.rs | 10 + crates/openshell-server/src/grpc/mod.rs | 10 +- crates/openshell-server/src/grpc/policy.rs | 57 +- crates/openshell-server/src/lib.rs | 29 + crates/openshell-server/src/runtime_config.rs | 584 ++++++++++++++++++ deploy/helm/openshell/README.md | 4 + .../openshell/templates/_gateway-workload.tpl | 10 + deploy/helm/openshell/templates/_helpers.tpl | 35 ++ .../openshell/templates/gateway-config.yaml | 3 + .../openshell/templates/runtime-config.yaml | 16 + .../openshell/tests/runtime_config_test.yaml | 102 +++ deploy/helm/openshell/values.yaml | 11 + docs/reference/gateway-config.mdx | 27 +- 14 files changed, 897 insertions(+), 11 deletions(-) create mode 100644 crates/openshell-server/src/runtime_config.rs create mode 100644 deploy/helm/openshell/templates/runtime-config.yaml create mode 100644 deploy/helm/openshell/tests/runtime_config_test.yaml diff --git a/architecture/gateway.md b/architecture/gateway.md index 7afec0767..81c998068 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -253,6 +253,10 @@ Policy and runtime settings are delivered together through the effective sandbox config path. A gateway-global policy can override sandbox-scoped policy. The sandbox supervisor polls for config revisions and hot-reloads dynamic policy when the policy engine accepts the update. +When configured, a gateway runtime settings file reconciles selected +gateway-global settings into the same settings row. Keys present in that file +are file-managed; omitted keys stay available through the normal global +settings API. Provider credential expiry is enforced during gateway-to-sandbox credential resolution and again by the sandbox placeholder resolver. This keeps expired @@ -422,6 +426,12 @@ Driver implementation settings live in the TOML driver tables. See `docs/reference/gateway-config.mdx` for worked per-driver examples and RFC 0003 for the full schema. +Startup configuration can reference a separate runtime settings file with +`runtime_config_path`. That file is watched after startup and is intentionally +limited to registered runtime settings such as provider/profile and logging +feature flags. It does not configure drivers, listeners, providers, +credentials, or global policy payloads. + `database_url` is env-only and rejected when present in the file (`OPENSHELL_DB_URL` / `--db-url`). diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs index 39cf02bba..7b40169c5 100644 --- a/crates/openshell-server/src/config_file.rs +++ b/crates/openshell-server/src/config_file.rs @@ -84,6 +84,11 @@ pub struct GatewayFileSection { // ── Logging ────────────────────────────────────────────────────────── #[serde(default)] pub log_level: Option, + /// Path to an optional gateway runtime settings file. When set, the + /// gateway loads registered runtime settings from this file at startup and + /// watches it for changes. + #[serde(default)] + pub runtime_config_path: Option, // ── Drivers ────────────────────────────────────────────────────────── #[serde(default)] @@ -351,6 +356,7 @@ version = 1 bind_address = "0.0.0.0:8080" health_bind_address = "0.0.0.0:8081" log_level = "info" +runtime_config_path = "/etc/openshell/runtime.toml" compute_drivers = ["kubernetes"] sandbox_namespace = "agents" grpc_rate_limit_requests = 120 @@ -377,6 +383,10 @@ grpc_endpoint = "https://openshell-gateway.agents.svc:8080" let file = load(tmp.path()).expect("valid file parses"); let gw = &file.openshell.gateway; assert_eq!(gw.log_level.as_deref(), Some("info")); + assert_eq!( + gw.runtime_config_path.as_deref(), + Some(Path::new("/etc/openshell/runtime.toml")) + ); assert_eq!( gw.default_image.as_deref(), Some("ghcr.io/nvidia/openshell/sandbox:latest") diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index 5947bb334..2ef08d3df 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -130,18 +130,18 @@ const MAX_PROVIDER_CONFIG_ENTRIES: usize = 64; // --------------------------------------------------------------------------- #[derive(Debug, Clone, Default, Serialize, Deserialize)] -struct StoredSettings { - revision: u64, - settings: BTreeMap, +pub struct StoredSettings { + pub revision: u64, + pub settings: BTreeMap, /// Database `resource_version` for CAS. Not persisted in the JSON payload; /// loaded from `ObjectRecord` and used for optimistic concurrency control. #[serde(skip)] - resource_version: u64, + pub resource_version: u64, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(tag = "type", content = "value")] -enum StoredSettingValue { +pub enum StoredSettingValue { String(String), Bool(bool), Int(i64), diff --git a/crates/openshell-server/src/grpc/policy.rs b/crates/openshell-server/src/grpc/policy.rs index 2e2210f44..a27779862 100644 --- a/crates/openshell-server/src/grpc/policy.rs +++ b/crates/openshell-server/src/grpc/policy.rs @@ -1605,6 +1605,11 @@ async fn handle_update_config_inner( if key != POLICY_SETTING_KEY { validate_registered_setting_key(key)?; } + if state.runtime_settings.is_managed_key(key) { + return Err(Status::failed_precondition(format!( + "setting '{key}' is managed by the gateway runtime config file; update that file instead" + ))); + } let mut global_settings = load_global_settings(state.store.as_ref()).await?; let changed = if req.delete_setting { @@ -3698,14 +3703,11 @@ fn upsert_setting_value( } } -pub(super) async fn load_global_settings(store: &Store) -> Result { +pub async fn load_global_settings(store: &Store) -> Result { load_settings_record(store, GLOBAL_SETTINGS_OBJECT_TYPE, GLOBAL_SETTINGS_NAME).await } -pub(super) async fn save_global_settings( - store: &Store, - settings: &StoredSettings, -) -> Result<(), Status> { +pub async fn save_global_settings(store: &Store, settings: &StoredSettings) -> Result<(), Status> { save_settings_record( store, GLOBAL_SETTINGS_OBJECT_TYPE, @@ -8825,6 +8827,51 @@ mod tests { ); } + #[tokio::test] + async fn update_config_global_rejects_set_for_runtime_managed_key() { + let state = test_server_state().await; + state + .runtime_settings + .set_managed_keys([settings::PROVIDERS_V2_ENABLED_KEY.to_string()]); + + let req = with_user(Request::new(UpdateConfigRequest { + global: true, + setting_key: settings::PROVIDERS_V2_ENABLED_KEY.to_string(), + setting_value: Some(SettingValue { + value: Some(setting_value::Value::BoolValue(false)), + }), + ..Default::default() + })); + let err = handle_update_config(&state, req) + .await + .expect_err("runtime-managed setting must reject global set"); + assert_eq!(err.code(), Code::FailedPrecondition); + assert!( + err.message().contains("runtime config file"), + "expected runtime config file guidance; got: {}", + err.message() + ); + } + + #[tokio::test] + async fn update_config_global_rejects_delete_for_runtime_managed_key() { + let state = test_server_state().await; + state + .runtime_settings + .set_managed_keys([settings::PROVIDERS_V2_ENABLED_KEY.to_string()]); + + let req = with_user(Request::new(UpdateConfigRequest { + global: true, + setting_key: settings::PROVIDERS_V2_ENABLED_KEY.to_string(), + delete_setting: true, + ..Default::default() + })); + let err = handle_update_config(&state, req) + .await + .expect_err("runtime-managed setting must reject global delete"); + assert_eq!(err.code(), Code::FailedPrecondition); + } + #[cfg(feature = "dev-settings")] #[test] fn merge_effective_settings_global_overrides_sandbox_key() { diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 9f1127d0e..7c3066e69 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -33,6 +33,7 @@ mod persistence; pub(crate) mod policy_store; mod provider_refresh; mod readiness; +mod runtime_config; mod sandbox_index; mod sandbox_watch; mod service_routing; @@ -107,6 +108,9 @@ pub struct ServerState { /// mutations that reads global state. pub settings_mutex: tokio::sync::Mutex<()>, + /// Runtime settings managed by the optional gateway runtime config file. + pub(crate) runtime_settings: runtime_config::RuntimeSettingsState, + /// Registry of active supervisor sessions and pending relay channels. /// /// Stored as `Arc` so compute drivers (e.g. the Docker driver) @@ -179,6 +183,7 @@ impl ServerState { ssh_connections_by_token: Mutex::new(HashMap::new()), ssh_connections_by_sandbox: Mutex::new(HashMap::new()), settings_mutex: tokio::sync::Mutex::new(()), + runtime_settings: runtime_config::RuntimeSettingsState::default(), supervisor_sessions, oidc_cache, sandbox_jwt_issuer: None, @@ -254,6 +259,27 @@ pub async fn run_server( oidc_cache, ); + let runtime_config_path = config_file + .as_ref() + .and_then(|file| file.openshell.gateway.runtime_config_path.clone()); + if let Some(path) = runtime_config_path.as_ref() { + let outcome = runtime_config::apply_file( + path, + store.as_ref(), + &state.settings_mutex, + &state.runtime_settings, + ) + .await + .map_err(|e| Error::config(e.to_string()))?; + info!( + path = %path.display(), + changed = outcome.changed, + settings_revision = outcome.revision, + managed_key_count = outcome.managed_key_count, + "runtime config file applied" + ); + } + // Load the gateway-minted sandbox JWT signing key when configured. // Optional so single-driver dev deployments without certgen continue // to start. The helm-deployed gateway and the RPM init script populate @@ -353,6 +379,9 @@ pub async fn run_server( } state.compute.spawn_watchers(shutdown_rx.clone()); + if let Some(path) = runtime_config_path { + runtime_config::spawn_watcher(state.clone(), path, shutdown_rx.clone()); + } ssh_sessions::spawn_session_reaper(store.clone(), Duration::from_secs(3600)); supervisor_session::spawn_relay_reaper(state.clone(), Duration::from_secs(30)); provider_refresh::spawn_refresh_worker(state.clone(), Duration::from_secs(60)); diff --git a/crates/openshell-server/src/runtime_config.rs b/crates/openshell-server/src/runtime_config.rs new file mode 100644 index 000000000..44bd3b328 --- /dev/null +++ b/crates/openshell-server/src/runtime_config.rs @@ -0,0 +1,584 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Gateway runtime settings file support. +//! +//! Runtime settings are persisted through the existing gateway-global settings +//! record so the normal `GetSandboxConfig` revision path carries changes to +//! running sandboxes. + +use crate::Store; +use crate::grpc::policy::{load_global_settings, save_global_settings}; +use crate::grpc::{StoredSettingValue, StoredSettings}; +use openshell_core::settings::{self, SettingValueKind}; +use serde::Deserialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; +use std::time::{Duration, SystemTime}; +use tokio::sync::{Mutex, watch}; +use tonic::Code; +use tracing::{debug, info, warn}; + +const RUNTIME_CONFIG_SCHEMA_VERSION: u32 = 1; +const WATCH_INTERVAL: Duration = Duration::from_secs(2); +const APPLY_RETRY_LIMIT: usize = 5; + +/// Tracks runtime settings currently owned by the runtime config file. +#[derive(Debug, Clone, Default)] +pub struct RuntimeSettingsState { + managed_keys: Arc>>, +} + +impl RuntimeSettingsState { + pub fn is_managed_key(&self, key: &str) -> bool { + self.managed_keys + .read() + .expect("runtime settings lock poisoned") + .contains(key) + } + + pub fn set_managed_keys(&self, keys: I) + where + I: IntoIterator, + { + *self + .managed_keys + .write() + .expect("runtime settings lock poisoned") = keys.into_iter().collect(); + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RuntimeSettingsDocument { + settings: BTreeMap, +} + +impl RuntimeSettingsDocument { + fn managed_keys(&self) -> impl Iterator + '_ { + self.settings.keys().cloned() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum RuntimeConfigError { + #[error("failed to read runtime config file '{}': {source}", path.display())] + Io { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("failed to parse runtime config file '{}': {source}", path.display())] + Parse { + path: PathBuf, + #[source] + source: toml::de::Error, + }, + #[error( + "unsupported runtime config version {version}; this build only supports version {RUNTIME_CONFIG_SCHEMA_VERSION}" + )] + UnsupportedVersion { version: u32 }, + #[error("runtime config setting 'policy' is reserved; use global policy APIs instead")] + ReservedPolicySetting, + #[error("unknown runtime config setting '{key}'. Allowed keys: {allowed}")] + UnknownSetting { key: String, allowed: String }, + #[error("runtime config setting '{key}' expects {expected} value; got {actual}")] + TypeMismatch { + key: String, + expected: &'static str, + actual: &'static str, + }, + #[error("runtime config setting '{key}' expects one of [{allowed}]; got '{value}'")] + InvalidStringValue { + key: String, + allowed: String, + value: String, + }, + #[error("failed to persist runtime settings from '{}': {message}", path.display())] + Persist { path: PathBuf, message: String }, +} + +#[derive(Debug, Default, Deserialize)] +#[serde(deny_unknown_fields)] +struct RawRuntimeConfig { + #[serde(default)] + openshell: RawOpenShellRuntimeRoot, +} + +#[derive(Debug, Default, Deserialize)] +#[serde(deny_unknown_fields)] +struct RawOpenShellRuntimeRoot { + #[serde(default)] + version: Option, + #[serde(default)] + runtime: RawRuntimeSection, +} + +#[derive(Debug, Default, Deserialize)] +#[serde(deny_unknown_fields)] +struct RawRuntimeSection { + #[serde(default)] + settings: BTreeMap, +} + +pub fn load(path: &Path) -> Result { + let contents = fs::read_to_string(path).map_err(|source| RuntimeConfigError::Io { + path: path.to_path_buf(), + source, + })?; + parse(path, &contents) +} + +fn parse(path: &Path, contents: &str) -> Result { + if contents.trim().is_empty() { + return Ok(RuntimeSettingsDocument { + settings: BTreeMap::new(), + }); + } + + let raw: RawRuntimeConfig = + toml::from_str(contents).map_err(|source| RuntimeConfigError::Parse { + path: path.to_path_buf(), + source, + })?; + + if let Some(version) = raw.openshell.version + && version > RUNTIME_CONFIG_SCHEMA_VERSION + { + return Err(RuntimeConfigError::UnsupportedVersion { version }); + } + + let mut parsed = BTreeMap::new(); + for (key, value) in raw.openshell.runtime.settings { + let stored = parse_setting_value(&key, &value)?; + parsed.insert(key, stored); + } + + Ok(RuntimeSettingsDocument { settings: parsed }) +} + +fn parse_setting_value( + key: &str, + value: &toml::Value, +) -> Result { + if key == "policy" { + return Err(RuntimeConfigError::ReservedPolicySetting); + } + + let setting = + settings::setting_for_key(key).ok_or_else(|| RuntimeConfigError::UnknownSetting { + key: key.to_string(), + allowed: settings::registered_keys_csv(), + })?; + + match (setting.kind, value) { + (SettingValueKind::Bool, toml::Value::Boolean(value)) => { + Ok(StoredSettingValue::Bool(*value)) + } + (SettingValueKind::Int, toml::Value::Integer(value)) => Ok(StoredSettingValue::Int(*value)), + (SettingValueKind::String, toml::Value::String(value)) => { + if let Err(allowed) = setting.validate_string_value(value) { + return Err(RuntimeConfigError::InvalidStringValue { + key: key.to_string(), + allowed: allowed.join(", "), + value: value.clone(), + }); + } + Ok(StoredSettingValue::String(value.clone())) + } + (kind, value) => Err(RuntimeConfigError::TypeMismatch { + key: key.to_string(), + expected: kind.as_str(), + actual: toml_value_kind(value), + }), + } +} + +fn toml_value_kind(value: &toml::Value) -> &'static str { + match value { + toml::Value::String(_) => "string", + toml::Value::Integer(_) => "int", + toml::Value::Float(_) => "float", + toml::Value::Boolean(_) => "bool", + toml::Value::Datetime(_) => "datetime", + toml::Value::Array(_) => "array", + toml::Value::Table(_) => "table", + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct RuntimeSettingsApplyOutcome { + pub changed: bool, + pub revision: u64, + pub managed_key_count: usize, +} + +pub async fn apply_file( + path: &Path, + store: &Store, + settings_mutex: &Mutex<()>, + state: &RuntimeSettingsState, +) -> Result { + let document = load(path)?; + apply_document(path, document, store, settings_mutex, state).await +} + +pub async fn apply_document( + path: &Path, + document: RuntimeSettingsDocument, + store: &Store, + settings_mutex: &Mutex<()>, + state: &RuntimeSettingsState, +) -> Result { + let _guard = settings_mutex.lock().await; + + for attempt in 1..=APPLY_RETRY_LIMIT { + let mut global = + load_global_settings(store) + .await + .map_err(|status| RuntimeConfigError::Persist { + path: path.to_path_buf(), + message: status.message().to_string(), + })?; + + let changed = upsert_runtime_settings(&mut global, &document); + if changed { + global.revision = global.revision.wrapping_add(1); + match save_global_settings(store, &global).await { + Ok(()) => {} + Err(status) if status.code() == Code::Aborted && attempt < APPLY_RETRY_LIMIT => { + debug!( + path = %path.display(), + attempt, + "runtime config settings write conflicted; retrying" + ); + continue; + } + Err(status) => { + return Err(RuntimeConfigError::Persist { + path: path.to_path_buf(), + message: status.message().to_string(), + }); + } + } + } + + let managed_key_count = document.settings.len(); + state.set_managed_keys(document.managed_keys()); + + return Ok(RuntimeSettingsApplyOutcome { + changed, + revision: global.revision, + managed_key_count, + }); + } + + Err(RuntimeConfigError::Persist { + path: path.to_path_buf(), + message: "settings were modified concurrently; retry limit exceeded".to_string(), + }) +} + +fn upsert_runtime_settings( + global: &mut StoredSettings, + document: &RuntimeSettingsDocument, +) -> bool { + let mut changed = false; + for (key, value) in &document.settings { + if global.settings.get(key) != Some(value) { + global.settings.insert(key.clone(), value.clone()); + changed = true; + } + } + changed +} + +pub fn spawn_watcher( + state: Arc, + path: PathBuf, + mut shutdown_rx: watch::Receiver, +) { + tokio::spawn(async move { + let mut snapshot = RuntimeFileSnapshot::capture(&path); + let mut interval = tokio::time::interval(WATCH_INTERVAL); + + loop { + tokio::select! { + changed = shutdown_rx.changed() => { + if changed.is_err() || *shutdown_rx.borrow() { + debug!( + path = %path.display(), + "runtime config watcher shutting down" + ); + break; + } + } + _ = interval.tick() => { + let next = RuntimeFileSnapshot::capture(&path); + if next == snapshot { + continue; + } + snapshot = next; + match apply_file( + &path, + state.store.as_ref(), + &state.settings_mutex, + &state.runtime_settings, + ).await { + Ok(outcome) => info!( + path = %path.display(), + changed = outcome.changed, + settings_revision = outcome.revision, + managed_key_count = outcome.managed_key_count, + "runtime config file reloaded" + ), + Err(err) => warn!( + path = %path.display(), + error = %err, + "runtime config reload failed; keeping last valid settings" + ), + } + } + } + } + }); +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct RuntimeFileSnapshot { + symlink: Option, + target: Option, + parent: Option, +} + +impl RuntimeFileSnapshot { + fn capture(path: &Path) -> Self { + let parent = path.parent().and_then(|parent| { + fs::metadata(parent) + .ok() + .map(FileMetadataSnapshot::from_metadata) + }); + let symlink = fs::symlink_metadata(path) + .ok() + .map(FileMetadataSnapshot::from_metadata); + let target = fs::metadata(path) + .ok() + .map(FileMetadataSnapshot::from_metadata); + Self { + symlink, + target, + parent, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct FileMetadataSnapshot { + len: u64, + modified: Option, + is_dir: bool, + is_file: bool, + is_symlink: bool, +} + +impl FileMetadataSnapshot { + fn from_metadata(metadata: fs::Metadata) -> Self { + let file_type = metadata.file_type(); + Self { + len: metadata.len(), + modified: metadata.modified().ok(), + is_dir: file_type.is_dir(), + is_file: file_type.is_file(), + is_symlink: file_type.is_symlink(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::persistence::test_store; + use std::io::Write; + + fn parse_test(contents: &str) -> Result { + parse(Path::new("/runtime.toml"), contents) + } + + #[test] + fn parses_registered_runtime_settings() { + let doc = parse_test( + r#" +[openshell.runtime.settings] +providers_v2_enabled = true +proposal_approval_mode = "auto" +"#, + ) + .expect("runtime config parses"); + + assert_eq!( + doc.settings.get(settings::PROVIDERS_V2_ENABLED_KEY), + Some(&StoredSettingValue::Bool(true)) + ); + assert_eq!( + doc.settings.get(settings::PROPOSAL_APPROVAL_MODE_KEY), + Some(&StoredSettingValue::String("auto".to_string())) + ); + } + + #[test] + fn rejects_unknown_setting() { + let err = parse_test( + r" +[openshell.runtime.settings] +unknown_key = true +", + ) + .expect_err("unknown setting must be rejected"); + assert!(matches!(err, RuntimeConfigError::UnknownSetting { .. })); + } + + #[test] + fn rejects_reserved_policy_setting() { + let err = parse_test( + r#" +[openshell.runtime.settings] +policy = "deadbeef" +"#, + ) + .expect_err("policy must be rejected"); + assert!(matches!(err, RuntimeConfigError::ReservedPolicySetting)); + } + + #[test] + fn rejects_type_mismatch() { + let err = parse_test( + r#" +[openshell.runtime.settings] +providers_v2_enabled = "true" +"#, + ) + .expect_err("bool key must reject string value"); + assert!(matches!( + err, + RuntimeConfigError::TypeMismatch { + expected: "bool", + actual: "string", + .. + } + )); + } + + #[test] + fn rejects_invalid_string_value() { + let err = parse_test( + r#" +[openshell.runtime.settings] +proposal_approval_mode = "autom" +"#, + ) + .expect_err("invalid enum value must be rejected"); + assert!(matches!(err, RuntimeConfigError::InvalidStringValue { .. })); + } + + #[tokio::test] + async fn apply_file_preserves_unmanaged_global_settings() { + let mut tmp = tempfile::Builder::new() + .suffix(".toml") + .tempfile() + .expect("tempfile"); + tmp.write_all( + br" +[openshell.runtime.settings] +providers_v2_enabled = true +", + ) + .expect("write runtime config"); + + let store = test_store().await; + let settings_mutex = Mutex::new(()); + let runtime_state = RuntimeSettingsState::default(); + + let mut existing = StoredSettings::default(); + existing.settings.insert( + "unmanaged_future_setting".to_string(), + StoredSettingValue::String("keep".to_string()), + ); + existing.revision = 7; + save_global_settings(&store, &existing).await.unwrap(); + + let outcome = apply_file(tmp.path(), &store, &settings_mutex, &runtime_state) + .await + .expect("apply runtime config"); + assert!(outcome.changed); + assert_eq!(outcome.revision, 8); + assert!(runtime_state.is_managed_key(settings::PROVIDERS_V2_ENABLED_KEY)); + + let loaded = load_global_settings(&store).await.unwrap(); + assert_eq!( + loaded.settings.get("unmanaged_future_setting"), + Some(&StoredSettingValue::String("keep".to_string())) + ); + assert_eq!( + loaded.settings.get(settings::PROVIDERS_V2_ENABLED_KEY), + Some(&StoredSettingValue::Bool(true)) + ); + } + + #[tokio::test] + async fn apply_file_updates_managed_keys_when_file_removes_a_key() { + let store = test_store().await; + let settings_mutex = Mutex::new(()); + let runtime_state = RuntimeSettingsState::default(); + + let path = Path::new("/runtime.toml"); + let first = parse_test( + r" +[openshell.runtime.settings] +providers_v2_enabled = true +ocsf_json_enabled = true +", + ) + .unwrap(); + apply_document(path, first, &store, &settings_mutex, &runtime_state) + .await + .unwrap(); + assert!(runtime_state.is_managed_key("ocsf_json_enabled")); + + let second = parse_test( + r" +[openshell.runtime.settings] +providers_v2_enabled = true +", + ) + .unwrap(); + let outcome = apply_document(path, second, &store, &settings_mutex, &runtime_state) + .await + .unwrap(); + assert!(!outcome.changed); + assert!(!runtime_state.is_managed_key("ocsf_json_enabled")); + + let loaded = load_global_settings(&store).await.unwrap(); + assert_eq!( + loaded.settings.get("ocsf_json_enabled"), + Some(&StoredSettingValue::Bool(true)), + "removing a key from the file must not delete the last persisted global value" + ); + } + + #[test] + fn file_snapshot_changes_after_rewrite() { + let mut tmp = tempfile::Builder::new() + .suffix(".toml") + .tempfile() + .expect("tempfile"); + tmp.write_all(b"one").expect("write first"); + let first = RuntimeFileSnapshot::capture(tmp.path()); + std::thread::sleep(Duration::from_millis(10)); + tmp.as_file_mut().set_len(0).expect("truncate"); + tmp.write_all(b"two-two").expect("write second"); + tmp.as_file_mut().sync_all().expect("sync"); + let second = RuntimeFileSnapshot::capture(tmp.path()); + assert_ne!(first, second); + } +} diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index e6d539592..0b25bdbd8 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -213,6 +213,10 @@ add `ci/values-spire.yaml` to the OpenShell release values files. | server.oidc.userRole | string | `""` | Role name for standard user access. | | server.providerTokenGrants.spiffe.enabled | bool | `false` | Mount the SPIFFE Workload API socket into sandbox pods for dynamic provider token grants. | | server.providerTokenGrants.spiffe.workloadApiSocketPath | string | `"/spiffe-workload-api/spire-agent.sock"` | Path to the SPIFFE Workload API socket mounted into sandbox pods. | +| server.runtimeConfig.enabled | bool | `false` | Mount a runtime settings file and configure the gateway to watch it. | +| server.runtimeConfig.existingConfigMap | string | `""` | Existing ConfigMap containing the runtime settings file. When empty and runtimeConfig.enabled=true, the chart renders one from settings. | +| server.runtimeConfig.filename | string | `"runtime.toml"` | Runtime settings filename inside the ConfigMap. | +| server.runtimeConfig.settings | object | `{}` | Gateway runtime settings rendered under [openshell.runtime.settings]. Values must match registered runtime setting types. | | server.sandboxImage | string | `"ghcr.io/nvidia/openshell-community/sandboxes/base:latest"` | Default sandbox image used when requests do not specify one. | | server.sandboxImagePullPolicy | string | `""` | Kubernetes imagePullPolicy for sandbox pods. Empty = Kubernetes default (Always for :latest, IfNotPresent otherwise). Set to "Always" for dev clusters so new images are picked up without manual eviction. | | server.sandboxImagePullSecrets | list | `[]` | Image pull secrets attached to sandbox pods. Referenced Secrets must exist in the sandbox namespace. | diff --git a/deploy/helm/openshell/templates/_gateway-workload.tpl b/deploy/helm/openshell/templates/_gateway-workload.tpl index 5931047e5..47ea07abd 100644 --- a/deploy/helm/openshell/templates/_gateway-workload.tpl +++ b/deploy/helm/openshell/templates/_gateway-workload.tpl @@ -77,6 +77,11 @@ spec: - name: gateway-config mountPath: /etc/openshell readOnly: true + {{- if eq (include "openshell.runtimeConfigEnabled" .) "true" }} + - name: runtime-config + mountPath: /etc/openshell-runtime + readOnly: true + {{- end }} - name: sandbox-jwt mountPath: /etc/openshell-jwt readOnly: true @@ -136,6 +141,11 @@ spec: - name: gateway-config configMap: name: {{ include "openshell.fullname" . }}-config + {{- if eq (include "openshell.runtimeConfigEnabled" .) "true" }} + - name: runtime-config + configMap: + name: {{ include "openshell.runtimeConfigMapName" . }} + {{- end }} - name: sandbox-jwt secret: secretName: {{ include "openshell.sandboxJwtSecretName" . }} diff --git a/deploy/helm/openshell/templates/_helpers.tpl b/deploy/helm/openshell/templates/_helpers.tpl index 30c027576..2c66e668f 100644 --- a/deploy/helm/openshell/templates/_helpers.tpl +++ b/deploy/helm/openshell/templates/_helpers.tpl @@ -109,6 +109,34 @@ Name of the Secret holding gateway-minted sandbox JWT signing material. {{- .Values.server.sandboxJwt.signingSecretName | default (printf "%s-jwt-keys" (include "openshell.fullname" .)) -}} {{- end }} +{{/* +Whether the gateway runtime settings ConfigMap mount is enabled. +*/}} +{{- define "openshell.runtimeConfigEnabled" -}} +{{- $runtime := .Values.server.runtimeConfig | default dict -}} +{{- if (get $runtime "enabled" | default false) -}}true{{- else -}}false{{- end -}} +{{- end }} + +{{/* +Runtime settings ConfigMap name. +*/}} +{{- define "openshell.runtimeConfigMapName" -}} +{{- $runtime := .Values.server.runtimeConfig | default dict -}} +{{- get $runtime "existingConfigMap" | default (printf "%s-runtime-config" (include "openshell.fullname" .)) -}} +{{- end }} + +{{/* +Runtime settings filename and mount path. +*/}} +{{- define "openshell.runtimeConfigFilename" -}} +{{- $runtime := .Values.server.runtimeConfig | default dict -}} +{{- get $runtime "filename" | default "runtime.toml" -}} +{{- end }} + +{{- define "openshell.runtimeConfigPath" -}} +{{- printf "/etc/openshell-runtime/%s" (include "openshell.runtimeConfigFilename" .) -}} +{{- end }} + {{/* gRPC endpoint sandbox pods use to call back into the gateway. An explicit .Values.server.grpcEndpoint is used verbatim. Otherwise it is derived from @@ -178,4 +206,11 @@ Validate chart values that Helm would otherwise accept silently. {{- if and (eq $workloadKind "statefulset") (gt $replicaCount 1) (not (get $workload "allowMultiReplicaStatefulSet" | default false)) -}} {{- fail "replicaCount > 1 with workload.kind=statefulset requires workload.allowMultiReplicaStatefulSet=true; use workload.kind=deployment for external database-backed multi-replica gateways." -}} {{- end -}} +{{- $runtime := .Values.server.runtimeConfig | default dict -}} +{{- if (get $runtime "enabled" | default false) -}} +{{- $filename := get $runtime "filename" | default "runtime.toml" -}} +{{- if or (eq $filename "") (contains "/" $filename) -}} +{{- fail "server.runtimeConfig.filename must be a non-empty filename, not a path." -}} +{{- end -}} +{{- end -}} {{- end }} diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index 7037be88f..a5c8f59e2 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -32,6 +32,9 @@ data: metrics_bind_address = "0.0.0.0:{{ .Values.service.metricsPort }}" {{- end }} log_level = {{ .Values.server.logLevel | quote }} + {{- if eq (include "openshell.runtimeConfigEnabled" .) "true" }} + runtime_config_path = {{ include "openshell.runtimeConfigPath" . | quote }} + {{- end }} sandbox_namespace = {{ include "openshell.sandboxNamespace" . | quote }} default_image = {{ .Values.server.sandboxImage | quote }} supervisor_image = {{ include "openshell.supervisorImage" . | quote }} diff --git a/deploy/helm/openshell/templates/runtime-config.yaml b/deploy/helm/openshell/templates/runtime-config.yaml new file mode 100644 index 000000000..b80501903 --- /dev/null +++ b/deploy/helm/openshell/templates/runtime-config.yaml @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +{{- if and (eq (include "openshell.runtimeConfigEnabled" .) "true") (not .Values.server.runtimeConfig.existingConfigMap) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "openshell.runtimeConfigMapName" . }} + labels: + {{- include "openshell.labels" . | nindent 4 }} +data: + {{ include "openshell.runtimeConfigFilename" . }}: | + [openshell.runtime.settings] + {{- range $key, $value := (.Values.server.runtimeConfig.settings | default dict) }} + {{ $key }} = {{ $value | toJson }} + {{- end }} +{{- end }} diff --git a/deploy/helm/openshell/tests/runtime_config_test.yaml b/deploy/helm/openshell/tests/runtime_config_test.yaml new file mode 100644 index 000000000..7393db016 --- /dev/null +++ b/deploy/helm/openshell/tests/runtime_config_test.yaml @@ -0,0 +1,102 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +suite: gateway runtime config +templates: + - templates/runtime-config.yaml + - templates/gateway-config.yaml + - templates/statefulset.yaml +release: + name: openshell + namespace: my-namespace + +tests: + - it: does not render a runtime config map by default + template: templates/runtime-config.yaml + asserts: + - hasDocuments: + count: 0 + + - it: omits runtime_config_path by default + template: templates/gateway-config.yaml + asserts: + - notMatchRegex: + path: data["gateway.toml"] + pattern: 'runtime_config_path\s*=' + + - it: renders a runtime config map from values + template: templates/runtime-config.yaml + set: + server.runtimeConfig.enabled: true + server.runtimeConfig.settings.providers_v2_enabled: true + server.runtimeConfig.settings.proposal_approval_mode: auto + asserts: + - hasDocuments: + count: 1 + - equal: + path: metadata.name + value: openshell-runtime-config + - matchRegex: + path: data["runtime.toml"] + pattern: 'providers_v2_enabled\s*=\s*true' + - matchRegex: + path: data["runtime.toml"] + pattern: 'proposal_approval_mode\s*=\s*"auto"' + + - it: renders runtime_config_path when runtime config is enabled + template: templates/gateway-config.yaml + set: + server.runtimeConfig.enabled: true + asserts: + - matchRegex: + path: data["gateway.toml"] + pattern: 'runtime_config_path\s*=\s*"/etc/openshell-runtime/runtime.toml"' + + - it: mounts the rendered runtime config map + template: templates/statefulset.yaml + set: + server.runtimeConfig.enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: runtime-config + mountPath: /etc/openshell-runtime + readOnly: true + - contains: + path: spec.template.spec.volumes + content: + name: runtime-config + configMap: + name: openshell-runtime-config + + - it: uses an existing runtime config map without rendering one + template: templates/runtime-config.yaml + set: + server.runtimeConfig.enabled: true + server.runtimeConfig.existingConfigMap: precreated-runtime + asserts: + - hasDocuments: + count: 0 + + - it: mounts an existing runtime config map + template: templates/statefulset.yaml + set: + server.runtimeConfig.enabled: true + server.runtimeConfig.existingConfigMap: precreated-runtime + asserts: + - contains: + path: spec.template.spec.volumes + content: + name: runtime-config + configMap: + name: precreated-runtime + + - it: rejects runtime config filenames that include paths + template: templates/statefulset.yaml + set: + server.runtimeConfig.enabled: true + server.runtimeConfig.filename: nested/runtime.toml + asserts: + - failedTemplate: + errorMessage: "server.runtimeConfig.filename must be a non-empty filename, not a path." diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index d7ff8b257..0e4714bb9 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -218,6 +218,17 @@ server: # -- gRPC rate-limit window length in seconds. Must be positive (alongside # requests) to enable rate limiting; 0 (default) disables it. windowSeconds: 0 + runtimeConfig: + # -- Mount a runtime settings file and configure the gateway to watch it. + enabled: false + # -- Existing ConfigMap containing the runtime settings file. When empty + # and runtimeConfig.enabled=true, the chart renders one from settings. + existingConfigMap: "" + # -- Runtime settings filename inside the ConfigMap. + filename: runtime.toml + # -- Gateway runtime settings rendered under [openshell.runtime.settings]. + # Values must match registered runtime setting types. + settings: {} auth: # -- UNSAFE: accept unauthenticated CLI/user requests as a local developer # principal. Intended only for trusted local Skaffold/k3d development or a diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index ff4542136..1de1bacf6 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -8,7 +8,9 @@ keywords: "Generative AI, Cybersecurity, AI Agents, Sandboxing, Gateway, Configu position: 5 --- -The OpenShell gateway reads its configuration from a TOML file when `--config` or `OPENSHELL_GATEWAY_CONFIG` is set. When neither is set, the gateway reads `$XDG_CONFIG_HOME/openshell/gateway.toml` if that file exists. If no config file exists, the gateway starts from built-in defaults. Gateway process flags and gateway `OPENSHELL_*` environment variables override the file. Compute driver settings live in the driver TOML tables. See [RFC 0003](https://github.com/NVIDIA/OpenShell/blob/main/rfc/0003-gateway-configuration/README.md) for the full schema. +The OpenShell gateway reads its startup configuration from a TOML file when `--config` or `OPENSHELL_GATEWAY_CONFIG` is set. When neither is set, the gateway reads `$XDG_CONFIG_HOME/openshell/gateway.toml` if that file exists. If no config file exists, the gateway starts from built-in defaults. Gateway process flags and gateway `OPENSHELL_*` environment variables override the file. Compute driver settings live in the driver TOML tables. See [RFC 0003](https://github.com/NVIDIA/OpenShell/blob/main/rfc/0003-gateway-configuration/README.md) for the full schema. + +Startup configuration is distinct from runtime settings such as `providers_v2_enabled` and `ocsf_json_enabled`. Manage runtime settings with `openshell settings set --global`, or point `runtime_config_path` at a runtime settings file that the gateway loads and watches. ## Source Precedence @@ -39,6 +41,8 @@ version = 1 [openshell.gateway] # ... gateway-wide settings ... +# Optional watched runtime settings file. +runtime_config_path = "/etc/openshell/runtime.toml" [openshell.gateway.tls] # ... gateway listener TLS ... @@ -103,6 +107,10 @@ guest_tls_key = "/etc/openshell/certs/client-key.pem" grpc_rate_limit_requests = 120 grpc_rate_limit_window_seconds = 60 +# Optional runtime settings file. The gateway loads this file at startup and +# watches it for updates. +runtime_config_path = "/etc/openshell/runtime.toml" + # Gateway listener TLS (distinct from the per-driver guest_tls_*). [openshell.gateway.tls] cert_path = "/etc/openshell/certs/gateway.pem" @@ -142,6 +150,23 @@ Local Docker, Podman, and VM gateways can also set `[openshell.gateway.mtls_auth `image_pull_policy` is intentionally not a shared gateway key. Kubernetes and Docker use `Always`, `IfNotPresent`, or `Never`. Podman uses `always`, `missing`, `never`, or `newer`. Set it inside the relevant driver table. +## Runtime Settings File + +`runtime_config_path` points at a separate TOML file for gateway-global runtime settings. The runtime settings file is not Kubernetes-specific; any gateway deployment can use it. Kubernetes deployments typically mount it from a ConfigMap so operators can enable settings such as providers v2 during Helm install or upgrade. + +```toml +[openshell.runtime.settings] +providers_v2_enabled = true +ocsf_json_enabled = true +proposal_approval_mode = "manual" +``` + +Only registered runtime setting keys are accepted. The gateway rejects unknown keys, the reserved `policy` key, values with the wrong TOML type, and invalid enum strings. + +When a runtime settings file is configured, keys present in that file are file-managed. `openshell settings set --global` and `openshell settings delete --global` reject changes to those keys; update the file instead. Keys omitted from the file remain managed through the normal global settings API. Removing a key from the file stops file-managing that key but does not delete the last persisted global value. + +The gateway fails startup if the configured runtime settings file cannot be read or parsed. After startup, invalid reloads are logged and ignored; the gateway keeps the last valid persisted settings. + ## Driver References Each example is a complete TOML file for one compute driver. The examples repeat `[openshell]` and `[openshell.gateway]` so they stay copyable, and the driver tables list the accepted driver-specific keys. Driver-specific values override inherited gateway defaults. The gateway rejects unknown driver fields after inheritance is merged.