diff --git a/k8s/bases/infrastructure/controllers/openbao/helm-release.yaml b/k8s/bases/infrastructure/controllers/openbao/helm-release.yaml index b9658d150..e5af4c61c 100644 --- a/k8s/bases/infrastructure/controllers/openbao/helm-release.yaml +++ b/k8s/bases/infrastructure/controllers/openbao/helm-release.yaml @@ -63,7 +63,21 @@ spec: size: ${openbao_storage_size:=1Gi} auditStorage: enabled: true - size: 1Gi + # 10Gi (up from the chart default 1Gi). The file audit backend + # does not rotate, and OpenBao FAILS CLOSED on audit-write + # errors (every API request blocks once the volume is full). + # 10Gi gives multi-year headroom for this cluster's request + # volume (~700 KB/day from current ESO + vault-snapshot + # traffic). Once the observability stack lands, promtail will + # ship the stream off-PVC and the size will become irrelevant. + # Until then, monitor 'kubelet_volume_stats_available_bytes' + # on the openbao-audit-* PVC and rotate manually via + # kubectl -n openbao exec openbao-0 -- sh -c \ + # 'mv /openbao/audit/audit.log /openbao/audit/audit.log.$(date -u +%Y%m%dT%H%M%SZ) && \ + # kill -HUP 1' + # (OpenBao reopens its audit FD on SIGHUP, so the move + + # signal pattern is safe.) + size: ${openbao_audit_storage_size:=10Gi} standalone: enabled: true config: | @@ -85,6 +99,34 @@ spec: storage "file" { path = "/openbao/data" } + + # Declarative file audit device on the auditStorage PV. OpenBao + # blocks runtime audit enables: `bao audit enable file …` + # returns "cannot enable audit device via API; use declarative, + # config-based audit device management instead", so the device + # MUST be declared here in HCL. The mount path /openbao/audit + # is the chart's auditStorage default. Every API request is + # written to /openbao/audit/audit.log as one JSON record per + # line; tail it from the openbao pod today, ship via promtail + # once the observability stack lands. + # + # HCL shape (per https://openbao.org/docs/configuration/audit): + # audit "" { + # type = "" + # path = "" # what /sys/audit// becomes + # options = { ... backend-specific options ... } + # } + # Both `type` and `path` are required at parse time. `type` + # selects the backend; `path` is what `bao audit list` reports + # and what API requests address. We use "file/" so the device + # is reachable at /sys/audit/file/. + audit "file" { + type = "file" + path = "file/" + options = { + file_path = "/openbao/audit/audit.log" + } + } topologySpreadConstraints: - maxSkew: 1 topologyKey: kubernetes.io/hostname diff --git a/k8s/bases/infrastructure/vault-config/job.yaml b/k8s/bases/infrastructure/vault-config/job.yaml index a69d3f532..cf541016b 100644 --- a/k8s/bases/infrastructure/vault-config/job.yaml +++ b/k8s/bases/infrastructure/vault-config/job.yaml @@ -10,6 +10,12 @@ # 3. Configures Kubernetes auth (in-cluster auto-discovery for CA + token reviewer) # 4. Creates least-privilege policies # 5. Creates auth roles mapping ServiceAccounts to policies +# 6. Configures OIDC auth (Dex) for human admin access +# 7. Configures the Database secrets engine for fleetdm MySQL rotation +# +# The file audit device is declared in the openbao HelmRelease config +# (declarative-only — OpenBao rejects runtime audit enables via API), +# so no runtime step is needed here for auditing. # # On fresh install the init containers auto-initialize the vault and create # the openbao-unseal Secret. On Velero restore the Secret and PVC are both @@ -450,6 +456,26 @@ spec: echo "It will be configured on the next vault-config reconciliation." fi + # NOTE: the file audit device used to be enabled here at + # runtime. OpenBao rejects that path -- `bao audit enable` + # returns "cannot enable audit device via API; use + # declarative, config-based audit device management + # instead". The audit device is now declared in the + # openbao HelmRelease's standalone.config HCL, alongside + # listener and storage: + # + # audit "file" { + # type = "file" + # path = "file/" + # options = { + # file_path = "/openbao/audit/audit.log" + # } + # } + # + # See https://openbao.org/docs/configuration/audit for the + # full shape (both `type` and `path` are required). No + # runtime step is needed for OpenBao to start auditing. + # --- 7. Database secrets engine: fleetdm MySQL static-role rotation --- # OpenBao owns and periodically rotates the 'fleet' MySQL user's # password; ESO reads the current value via the VaultDynamicSecret @@ -483,10 +509,18 @@ spec: # re-rotating on every idempotent Job re-run. App user only, # never root. if ! bao read database/static-roles/fleet >/dev/null 2>&1; then + # Default rotation: 168h (7 days). Was 2160h (90 days) + # which gave a leaked credential a 90-day half-life. + # 7 days matches HashiCorp's documented sweet spot for + # interactive workloads (long enough that cache misses + # are rare, short enough that a leak is bounded). + # Overridable via the fleetdm_mysql_rotation_period + # cluster variable for fork operators with different + # constraints. bao write database/static-roles/fleet \ db_name=fleetdm-mysql \ username="fleet" \ - rotation_period="${fleetdm_mysql_rotation_period:=2160h}" + rotation_period="${fleetdm_mysql_rotation_period:=168h}" echo "fleetdm MySQL static role created." else echo "fleetdm MySQL static role already exists."