Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions k8s/bases/infrastructure/controllers/cilium/helm-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ spec:
enabled: false
operator:
replicas: ${cilium_replicas:=2}
resources:
requests:
cpu: 100m
memory: 256Mi
podDisruptionBudget:
enabled: true
minAvailable: 1
Expand All @@ -94,6 +98,24 @@ spec:
ipam:
mode: kubernetes
kubeProxyReplacement: true
# ------------------------------------------------------------------
# Resource requests for the agent DaemonSet and the standalone
# cilium-envoy DaemonSet. These promote the pods out of BestEffort
# QoS so they survive node memory pressure; an OOMKilled cilium-agent
# leaves BPF state degraded and the node loses ClusterIP routing
# (observed cascading into ~13 workload crash-loops on prod-worker-2,
# 2026-05-28). Limits intentionally unset β€” Cilium recommends against
# capping the agent (https://docs.cilium.io/en/stable/operations/performance/).
# ------------------------------------------------------------------
resources:
requests:
cpu: 200m
memory: 512Mi
envoy:
resources:
requests:
cpu: 50m
memory: 128Mi
# Transparent WireGuard encryption for all pod-to-pod and node-to-node
# traffic. KubeSpan (Talos-layer WireGuard between nodes) is not
# enabled in this cluster, so without this setting inter-node pod
Expand All @@ -118,8 +140,23 @@ spec:
install:
namespace: kube-system
existingNamespace: true
# Resource requests promote spire-server and spire-agent pods
# out of BestEffort QoS. cilium-agent's SPIRE Delegate API
# client relies on the per-node spire-agent admin socket β€” if
# the agent is evicted/OOMKilled the cilium-agent on that
# node stays stuck retrying "SPIRE admin socket does not
# exist" and ClusterIP routing degrades alongside it.
agent:
resources:
requests:
cpu: 50m
memory: 128Mi
# TODO: Remove workaround when SPIRE no longer fails to start (https://github.com/cilium/cilium/issues/40533)
server:
resources:
requests:
cpu: 50m
memory: 128Mi
initContainers:
- command:
- /bin/sh
Expand Down
Loading