diff --git a/k8s/bases/apps/actual-budget/helm-release.yaml b/k8s/bases/apps/actual-budget/helm-release.yaml index c6582230e..6dcc3ebcf 100644 --- a/k8s/bases/apps/actual-budget/helm-release.yaml +++ b/k8s/bases/apps/actual-budget/helm-release.yaml @@ -49,6 +49,23 @@ spec: matchLabels: app.kubernetes.io/name: actualbudget app.kubernetes.io/instance: actual-budget + # Chart hardcodes startupProbe absence; values override the + # liveness/readiness blocks below but not startupProbe. Gate + # liveness/readiness on the container actually serving HTTP. + # initialDelaySeconds skips past the ~10s cold-start window so + # the first probe lands on a serving container — zero failure + # events during the merge-queue's 90s steady-state Warning + # check. + - op: add + path: /spec/template/spec/containers/0/startupProbe + value: + httpGet: + path: / + port: http + initialDelaySeconds: 20 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 12 # 60s grace beyond initial delay # https://github.com/community-charts/helm-charts/blob/main/charts/actualbudget/values.yaml values: replicaCount: ${actual_budget_replicas:=1} diff --git a/k8s/bases/apps/headlamp/helm-release.yaml b/k8s/bases/apps/headlamp/helm-release.yaml index 0dc9c7d9f..0ec11876d 100644 --- a/k8s/bases/apps/headlamp/helm-release.yaml +++ b/k8s/bases/apps/headlamp/helm-release.yaml @@ -71,6 +71,26 @@ spec: value: name: tmp-dir mountPath: /tmp + # Chart hardcodes liveness/readiness with K8s defaults + # (timeoutSeconds: 1, failureThreshold: 3, periodSeconds: 10). + # Headlamp is KEDA-scaled to 0 in prod; every cold start logs + # 1-3 Unhealthy probe warnings while the Go binary initialises. + # Add a startupProbe so liveness/readiness are gated until the + # main container is actually serving. + # initialDelaySeconds skips past the cold-start window so the + # first probe lands on a serving container — zero failure + # events during the merge-queue's 90s steady-state Warning + # check. + - op: add + path: /spec/template/spec/containers/0/startupProbe + value: + httpGet: + path: / + port: http + initialDelaySeconds: 20 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 12 # 60s grace beyond initial delay - target: kind: Deployment name: headlamp diff --git a/k8s/bases/apps/homepage/helm-release.yaml b/k8s/bases/apps/homepage/helm-release.yaml index 5364eed8d..a28c09dd5 100644 --- a/k8s/bases/apps/homepage/helm-release.yaml +++ b/k8s/bases/apps/homepage/helm-release.yaml @@ -52,6 +52,28 @@ spec: matchLabels: app.kubernetes.io/name: homepage app.kubernetes.io/instance: homepage + # Chart hardcodes liveness/readiness with K8s defaults + # (timeoutSeconds: 1, failureThreshold: 3, periodSeconds: 10). + # Homepage takes ~13s to start serving on a fresh pod, so each + # rollout produces 3 Unhealthy probe warnings per pod and leaves + # only ~17s of headroom before the liveness restart fires. Add + # a startupProbe to gate liveness/readiness during initial boot. + # + # initialDelaySeconds is past the observed ~13s cold start so + # the first probe lands on a serving container — zero failure + # events during a normal rollout (the merge-queue's 90s + # steady-state Warning check would otherwise count probe + # failures fired in that window). + - op: add + path: /spec/template/spec/containers/0/startupProbe + value: + httpGet: + path: / + port: http + initialDelaySeconds: 20 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 12 # 60s grace beyond initial delay # Flicker fix: with replicaCount=2 and the chart's default ClusterIP # service, the Kubernetes widget's poll for node/cluster CPU/memory # round-robins between two pods that each maintain their own