diff --git a/.changeset/bump-clickhouse-operator-v0.0.6.md b/.changeset/bump-clickhouse-operator-v0.0.6.md new file mode 100644 index 0000000..228bf81 --- /dev/null +++ b/.changeset/bump-clickhouse-operator-v0.0.6.md @@ -0,0 +1,5 @@ +--- +"helm-charts": patch +--- + +chore(deps): bump clickhouse-operator-helm to v0.0.6 diff --git a/.changeset/clickhouse-explicit-resources.md b/.changeset/clickhouse-explicit-resources.md new file mode 100644 index 0000000..04e89c4 --- /dev/null +++ b/.changeset/clickhouse-explicit-resources.md @@ -0,0 +1,21 @@ +--- +"helm-charts": patch +--- + +fix(clickhouse): harden ClickHouse defaults for clickhouse-operator v0.0.6 + +Two operator-default changes in v0.0.6 broke the single-replica ClickHouse +deployment; the chart now overrides both: + +- Explicit `containerTemplate.resources` (2Gi memory, 500m CPU request). The + operator otherwise applies a 512Mi default (request == limit as of v0.0.6), + which is too low for the full ClickStack schema and OOMKills the server + (exit 137) under ingestion plus background merges. + +- `settings.enableDatabaseSync: false`. The operator now defaults this to true, + which creates the `default` database with the Replicated (DatabaseReplicated) + engine so table metadata lives in Keeper. That feature targets multi-replica + clusters; in a single-replica deployment a transient Keeper hiccup during + startup desyncs the Replicated database and silently drops all seeded tables, + which never come back. Keeping `default` Atomic stores tables on the + persistent data volume so they survive restarts. diff --git a/charts/clickstack-operators/Chart.lock b/charts/clickstack-operators/Chart.lock index 469e93c..8d12391 100644 --- a/charts/clickstack-operators/Chart.lock +++ b/charts/clickstack-operators/Chart.lock @@ -4,6 +4,6 @@ dependencies: version: 1.7.0 - name: clickhouse-operator-helm repository: oci://ghcr.io/clickhouse - version: 0.0.2 -digest: sha256:1daf572004da83b1836c8867f11198530652fee6905d4786a2d5eef87bc611cd -generated: "2026-03-04T16:52:51.068188-06:00" + version: 0.0.6 +digest: sha256:5afcb0d78e0ceecf1a18f3f7dfb52ee2627b7acea2621ffa411ee7bfb530adf7 +generated: "2026-06-19T17:41:30.214406+02:00" diff --git a/charts/clickstack-operators/Chart.yaml b/charts/clickstack-operators/Chart.yaml index 86cc5f1..0b6ceee 100644 --- a/charts/clickstack-operators/Chart.yaml +++ b/charts/clickstack-operators/Chart.yaml @@ -13,6 +13,6 @@ dependencies: repository: https://mongodb.github.io/helm-charts alias: mongodb-operator - name: clickhouse-operator-helm - version: "~0.0.2" + version: "~0.0.6" repository: oci://ghcr.io/clickhouse alias: clickhouse-operator diff --git a/charts/clickstack-operators/values.yaml b/charts/clickstack-operators/values.yaml index 85fd44f..701bfd5 100644 --- a/charts/clickstack-operators/values.yaml +++ b/charts/clickstack-operators/values.yaml @@ -9,8 +9,8 @@ mongodb-operator: # See https://clickhouse.com/docs/clickhouse-operator/overview for all options clickhouse-operator: webhook: - enable: false + enabled: false certManager: - enable: false + enabled: false crd: - enable: true + enabled: true diff --git a/charts/clickstack/tests/clickhouse-deployment_test.yaml b/charts/clickstack/tests/clickhouse-deployment_test.yaml index ceef006..b8e252c 100644 --- a/charts/clickstack/tests/clickhouse-deployment_test.yaml +++ b/charts/clickstack/tests/clickhouse-deployment_test.yaml @@ -68,6 +68,20 @@ tests: path: spec.dataVolumeClaimSpec.resources.requests.storage value: 10Gi + - it: should set explicit container resources so the operator default does not OOMKill ClickHouse + templates: + - clickhouse/cluster.yaml + asserts: + - equal: + path: spec.containerTemplate.resources.requests.memory + value: 2Gi + - equal: + path: spec.containerTemplate.resources.limits.memory + value: 2Gi + - equal: + path: spec.containerTemplate.resources.requests.cpu + value: 500m + - it: should resolve keeperClusterRef template expression templates: - clickhouse/cluster.yaml @@ -152,3 +166,11 @@ tests: path: spec.settings.extraUsersConfig.users.app - isNotNull: path: spec.settings.extraUsersConfig.users.otelcollector + + - it: should disable operator databaseSync so the default DB stays Atomic + templates: + - clickhouse/cluster.yaml + asserts: + - equal: + path: spec.settings.enableDatabaseSync + value: false diff --git a/charts/clickstack/values.yaml b/charts/clickstack/values.yaml index edd375c..6c296ec 100644 --- a/charts/clickstack/values.yaml +++ b/charts/clickstack/values.yaml @@ -328,6 +328,12 @@ clickhouse: image: repository: clickhouse/clickhouse-server tag: "25.7-alpine" + resources: + requests: + cpu: 500m + memory: 2Gi + limits: + memory: 2Gi replicas: 1 shards: 1 keeperClusterRef: @@ -339,6 +345,9 @@ clickhouse: requests: storage: 10Gi settings: + # Keep the `default` database Atomic; the Replicated engine the operator + # selects when this is true drops seeded tables on Keeper desync. + enableDatabaseSync: false extraUsersConfig: users: app: diff --git a/integration-tests/full-stack/assert.sh b/integration-tests/full-stack/assert.sh index 2aaf481..579ea0f 100755 --- a/integration-tests/full-stack/assert.sh +++ b/integration-tests/full-stack/assert.sh @@ -12,7 +12,7 @@ echo "Waiting for services to initialize..." sleep 30 echo "Waiting for all pods to be ready..." -kubectl wait --for=condition=Ready pods --all --timeout=600s || true +kubectl wait --for=condition=Ready pods --all --field-selector=status.phase!=Succeeded --timeout=600s || true echo "Pod status:" kubectl get pods -o wide @@ -24,7 +24,7 @@ echo "Checking ClickHouseCluster CR..." kubectl get clickhousecluster -o wide || true echo "Waiting for all pods to be ready (final check)..." -kubectl wait --for=condition=Ready pods --all --timeout=600s +kubectl wait --for=condition=Ready pods --all --field-selector=status.phase!=Succeeded --timeout=600s echo "Final pod status:" kubectl get pods -o wide