From fecd6bd966be18c062d7958660928b12f424edce Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 21 May 2026 12:57:49 -0700 Subject: [PATCH 1/4] feat(node): track StatefulSet on Status, switch to typed Get+Update sync, pause scales to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SeiNode controller now owns the StatefulSet lifecycle continuously via a typed Get/Create/Update path, with the resulting object's {Name, UID} recorded on Status.StatefulSet so subsequent reconciles fetch and mutate the tracked identity directly rather than blindly re-applying. Lifecycle: - First reconcile (Status.StatefulSet=nil): adopt an existing StatefulSet with the expected name if one exists; otherwise create one. Record {Name, UID}. - Tracked reconciles: fetch by name; UID mismatch (out-of-band recreation) triggers Delete+Create+re-track; NotFound (manual delete) triggers Create+re-track. - Replicas: Spec.Paused=true forces Replicas=0; the controller is the authoritative writer. The apply-statefulset plan task and the controller both call the new noderesource.SyncStatefulSet helper, so there is one source of truth for STS lifecycle. The task drops its server-side-apply pattern and the corresponding nolint comment (which flagged the migration to a typed model as a deferred effort). Pause-scales-to-zero is exercised end-to-end: SeiNode.Spec.Paused flips the live StatefulSet's replicas to 0; clearing the field restores them. Coverage: - TestSeiNode_StatefulSetTrackedOnStatus — Status.StatefulSet recorded with {Name, UID} on first reconcile. - TestSeiNode_Paused_ScalesStatefulSetToZero — pause drops StatefulSet.Spec.Replicas to 0, unpause restores to 1. - TestSeiNode_StatefulSetRecreatedAfterDelete — out-of-band STS deletion is reconciled and the new UID is re-recorded. Co-Authored-By: Claude Opus 4.7 --- api/v1alpha1/seinode_types.go | 21 +++ config/crd/sei.io_seinodes.yaml | 21 +++ internal/controller/node/controller.go | 4 + internal/controller/node/statefulset.go | 29 +++ .../envtest/seinode_statefulset_test.go | 170 ++++++++++++++++++ internal/noderesource/sync.go | 129 +++++++++++++ internal/task/apply_statefulset.go | 20 +-- manifests/sei.io_seinodes.yaml | 21 +++ 8 files changed, 397 insertions(+), 18 deletions(-) create mode 100644 internal/controller/node/statefulset.go create mode 100644 internal/controller/nodedeployment/envtest/seinode_statefulset_test.go create mode 100644 internal/noderesource/sync.go diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 33bbf31..ef36049 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -3,6 +3,7 @@ package v1alpha1 import ( apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ) // SeiNodeSpec defines the desired state of a standalone Sei node. @@ -343,6 +344,26 @@ type SeiNodeStatus struct { // config so the node advertises a reachable address for gossip discovery. // +optional ExternalAddress string `json:"externalAddress,omitempty"` + + // StatefulSet references the StatefulSet the controller created for + // this SeiNode. UID is the identity check: an STS with the expected + // name but a different UID is not the one this controller created + // (e.g., manual recreation out-of-band) and triggers replacement. + // +optional + StatefulSet *StatefulSetRef `json:"statefulSet,omitempty"` +} + +// StatefulSetRef identifies a StatefulSet owned and managed by a +// SeiNode. Stored on Status so the controller can fetch and mutate the +// owned object directly rather than blindly server-side-applying. +type StatefulSetRef struct { + // Name of the StatefulSet (always equals the SeiNode name). + Name string `json:"name"` + + // UID of the StatefulSet. Used to detect out-of-band recreation: + // if a new StatefulSet appears with the same name but a different + // UID, the controller knows it is not the one it created. + UID types.UID `json:"uid"` } // +kubebuilder:object:root=true diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index b887725..07d4696 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -950,6 +950,27 @@ spec: items: type: string type: array + statefulSet: + description: |- + StatefulSet references the StatefulSet the controller created for + this SeiNode. UID is the identity check: an STS with the expected + name but a different UID is not the one this controller created + (e.g., manual recreation out-of-band) and triggers replacement. + properties: + name: + description: Name of the StatefulSet (always equals the SeiNode + name). + type: string + uid: + description: |- + UID of the StatefulSet. Used to detect out-of-band recreation: + if a new StatefulSet appears with the same name but a different + UID, the controller knows it is not the one it created. + type: string + required: + - name + - uid + type: object type: object type: object served: true diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 77683d6..3d98b57 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -112,6 +112,10 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, nil } + if err := r.reconcileStatefulSet(ctx, node); err != nil { + return ctrl.Result{}, fmt.Errorf("reconciling statefulset: %w", err) + } + if node.Spec.Paused { if err := flushStatus(); err != nil { return ctrl.Result{}, fmt.Errorf("flushing paused status: %w", err) diff --git a/internal/controller/node/statefulset.go b/internal/controller/node/statefulset.go new file mode 100644 index 0000000..118119d --- /dev/null +++ b/internal/controller/node/statefulset.go @@ -0,0 +1,29 @@ +package node + +import ( + "context" + "fmt" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" +) + +// reconcileStatefulSet syncs the owned StatefulSet via the typed +// Get+Create/Update helper and records the resulting object on +// Status.StatefulSet so subsequent reconciles fetch the tracked +// identity directly. +func (r *SeiNodeReconciler) reconcileStatefulSet(ctx context.Context, node *seiv1alpha1.SeiNode) error { + sts, err := noderesource.SyncStatefulSet(ctx, r.Client, r.Scheme, node, r.Platform) + if err != nil { + return fmt.Errorf("syncing statefulset: %w", err) + } + if node.Status.StatefulSet == nil || + node.Status.StatefulSet.UID != sts.UID || + node.Status.StatefulSet.Name != sts.Name { + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: sts.Name, + UID: sts.UID, + } + } + return nil +} diff --git a/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go b/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go new file mode 100644 index 0000000..b0bb968 --- /dev/null +++ b/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go @@ -0,0 +1,170 @@ +//go:build envtest + +package envtest_test + +import ( + "testing" + + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/controller/nodedeployment/envtest/fixtures" +) + +// TestSeiNode_StatefulSetTrackedOnStatus asserts the controller creates +// the owned StatefulSet on first reconcile and records {Name, UID} on +// Status. Subsequent reconciles fetch and mutate the tracked object +// rather than re-applying blindly. +func TestSeiNode_StatefulSetTrackedOnStatus(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "sts-tracked", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + + waitFor(t, func() bool { + cur := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, cur); err != nil { + return false + } + return cur.Status.StatefulSet != nil && cur.Status.StatefulSet.UID != "" + }, "Status.StatefulSet must be recorded after first reconcile") + + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + ref := cur.Status.StatefulSet + g.Expect(ref.Name).To(Equal(node.Name)) + + sts := &appsv1.StatefulSet{} + g.Expect(testCli.Get(testCtx, types.NamespacedName{Name: ref.Name, Namespace: ns}, sts)).To(Succeed()) + g.Expect(sts.UID).To(Equal(ref.UID), "tracked UID must match the live StatefulSet") +} + +// TestSeiNode_Paused_ScalesStatefulSetToZero exercises the hard-pause +// behavior: setting Spec.Paused=true scales the owned StatefulSet to +// zero replicas; clearing it restores the desired replica count. +func TestSeiNode_Paused_ScalesStatefulSetToZero(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "paused-scaled", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + + // Initial reconcile creates the StatefulSet at replicas=1. + stsKey := types.NamespacedName{Name: node.Name, Namespace: ns} + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + return sts.Spec.Replicas != nil && *sts.Spec.Replicas == 1 + }, "StatefulSet must be created with replicas=1 for an unpaused SeiNode") + + // Pause. Controller scales to 0. + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + patch := client.MergeFrom(cur.DeepCopy()) + cur.Spec.Paused = true + g.Expect(testCli.Patch(testCtx, cur, patch)).To(Succeed()) + + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + return sts.Spec.Replicas != nil && *sts.Spec.Replicas == 0 + }, "StatefulSet must scale to replicas=0 after pause") + + // Unpause. Controller restores replicas. + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + patch = client.MergeFrom(cur.DeepCopy()) + cur.Spec.Paused = false + g.Expect(testCli.Patch(testCtx, cur, patch)).To(Succeed()) + + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + return sts.Spec.Replicas != nil && *sts.Spec.Replicas == 1 + }, "StatefulSet must scale back to replicas=1 after unpause") +} + +// TestSeiNode_StatefulSetRecreatedAfterDelete covers the +// out-of-band-deletion case: if an operator deletes the StatefulSet, +// the next reconcile observes the NotFound, recreates the object, and +// re-records the new UID on Status.StatefulSet. +func TestSeiNode_StatefulSetRecreatedAfterDelete(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "sts-recreated", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + + waitFor(t, func() bool { + cur := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, cur); err != nil { + return false + } + return cur.Status.StatefulSet != nil + }, "Status.StatefulSet must be recorded after first reconcile") + + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + originalUID := cur.Status.StatefulSet.UID + + sts := &appsv1.StatefulSet{} + stsKey := types.NamespacedName{Name: node.Name, Namespace: ns} + g.Expect(testCli.Get(testCtx, stsKey, sts)).To(Succeed()) + g.Expect(testCli.Delete(testCtx, sts)).To(Succeed()) + + // Bump the spec so the controller reconciles again (the SeiNode + // reconciler watches StatefulSet via Owns, but envtest sometimes + // races; a Spec edit triggers the predicate deterministically). + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + patch := client.MergeFrom(cur.DeepCopy()) + if cur.Spec.Overrides == nil { + cur.Spec.Overrides = map[string]string{} + } + cur.Spec.Overrides["chain.touch-counter"] = "1" + g.Expect(testCli.Patch(testCtx, cur, patch)).To(Succeed()) + + waitFor(t, func() bool { + s := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, s); err != nil { + return apierrors.IsNotFound(err) == false + } + // Confirm the Status ref reflects the new UID. + latest := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, latest); err != nil { + return false + } + return latest.Status.StatefulSet != nil && + latest.Status.StatefulSet.UID == s.UID && + s.UID != originalUID + }, "deleted StatefulSet must be recreated with a new UID and re-tracked on Status") +} + +func newTestSeiNode(namespace, name string, paused bool) *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "pacific-1", + Image: fixtures.DefaultImage, + FullNode: &seiv1alpha1.FullNodeSpec{}, + Paused: paused, + }, + } +} diff --git a/internal/noderesource/sync.go b/internal/noderesource/sync.go new file mode 100644 index 0000000..e4f3de6 --- /dev/null +++ b/internal/noderesource/sync.go @@ -0,0 +1,129 @@ +package noderesource + +import ( + "context" + "fmt" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" +) + +// SyncStatefulSet brings the live StatefulSet for node into line with +// the rendered desired spec using typed Get + Create/Update — no +// server-side apply, no Unstructured. Returns the resulting live +// object so callers can record its identity (Name/UID) on the SeiNode +// status. +// +// Behaviors: +// - Spec.Paused=true forces Replicas=0 in the desired render. +// - When Status.StatefulSet tracks a UID and the live object has a +// different one, the impostor is deleted and recreated (out-of- +// band recreation by an operator). +// - When the live object is absent, it is created. When present and +// mutable fields diverge, it is Updated; otherwise no apiserver +// write is issued. +// +// Status mutation is left to the caller. The controller updates +// Status.StatefulSet on first observation; the apply-statefulset task +// can ignore the return because the controller catches up on the next +// reconcile. +func SyncStatefulSet( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + node *seiv1alpha1.SeiNode, + platform PlatformConfig, +) (*appsv1.StatefulSet, error) { + desired, err := GenerateStatefulSet(node, platform) + if err != nil { + return nil, fmt.Errorf("generating statefulset: %w", err) + } + if err := ctrl.SetControllerReference(node, desired, scheme); err != nil { + return nil, fmt.Errorf("setting owner reference: %w", err) + } + if node.Spec.Paused { + desired.Spec.Replicas = ptr.To(int32(0)) + } + + existing := &appsv1.StatefulSet{} + key := client.ObjectKeyFromObject(desired) + getErr := c.Get(ctx, key, existing) + + if apierrors.IsNotFound(getErr) { + if err := c.Create(ctx, desired); err != nil { + return nil, fmt.Errorf("creating statefulset: %w", err) + } + return desired, nil + } + if getErr != nil { + return nil, fmt.Errorf("fetching statefulset: %w", getErr) + } + + if node.Status.StatefulSet != nil && existing.UID != node.Status.StatefulSet.UID { + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return nil, fmt.Errorf("deleting impostor statefulset: %w", err) + } + if err := c.Create(ctx, desired); err != nil { + return nil, fmt.Errorf("recreating statefulset after UID mismatch: %w", err) + } + return desired, nil + } + + if mutateStatefulSet(existing, desired) { + if err := c.Update(ctx, existing); err != nil { + return nil, fmt.Errorf("updating statefulset: %w", err) + } + } + return existing, nil +} + +// mutateStatefulSet copies mutable Spec fields from desired into +// existing and returns whether anything changed. Immutable fields +// (Selector, ServiceName, VolumeClaimTemplates, PodManagementPolicy) +// are intentionally left alone so a deterministic-but-not-byte- +// identical render never trips an apiserver immutability check. +func mutateStatefulSet(existing, desired *appsv1.StatefulSet) bool { + changed := false + if !int32PtrEq(existing.Spec.Replicas, desired.Spec.Replicas) { + existing.Spec.Replicas = desired.Spec.Replicas + changed = true + } + if !apiequality.Semantic.DeepEqual(existing.Spec.Template, desired.Spec.Template) { + existing.Spec.Template = desired.Spec.Template + changed = true + } + if !apiequality.Semantic.DeepEqual(existing.Spec.UpdateStrategy, desired.Spec.UpdateStrategy) { + existing.Spec.UpdateStrategy = desired.Spec.UpdateStrategy + changed = true + } + if existing.Spec.MinReadySeconds != desired.Spec.MinReadySeconds { + existing.Spec.MinReadySeconds = desired.Spec.MinReadySeconds + changed = true + } + if !apiequality.Semantic.DeepEqual(existing.Labels, desired.Labels) { + existing.Labels = desired.Labels + changed = true + } + if !apiequality.Semantic.DeepEqual(existing.Annotations, desired.Annotations) { + existing.Annotations = desired.Annotations + changed = true + } + return changed +} + +func int32PtrEq(a, b *int32) bool { + if a == nil && b == nil { + return true + } + if a == nil || b == nil { + return false + } + return *a == *b +} diff --git a/internal/task/apply_statefulset.go b/internal/task/apply_statefulset.go index 4b8c745..f547b4f 100644 --- a/internal/task/apply_statefulset.go +++ b/internal/task/apply_statefulset.go @@ -5,10 +5,6 @@ import ( "encoding/json" "fmt" - appsv1 "k8s.io/api/apps/v1" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" ) @@ -48,21 +44,9 @@ func (e *applyStatefulSetExecution) Execute(ctx context.Context) error { if err != nil { return err } - - desired, err := noderesource.GenerateStatefulSet(node, e.cfg.Platform) - if err != nil { - return Terminal(fmt.Errorf("generating statefulset: %w", err)) - } - desired.SetGroupVersionKind(appsv1.SchemeGroupVersion.WithKind("StatefulSet")) - if err := ctrl.SetControllerReference(node, desired, e.cfg.Scheme); err != nil { - return fmt.Errorf("setting owner reference on statefulset: %w", err) + if _, err := noderesource.SyncStatefulSet(ctx, e.cfg.KubeClient, e.cfg.Scheme, node, e.cfg.Platform); err != nil { + return fmt.Errorf("syncing statefulset: %w", err) } - - //nolint:staticcheck // migrating to typed ApplyConfiguration is a separate effort - if err := e.cfg.KubeClient.Patch(ctx, desired, client.Apply, fieldOwner, client.ForceOwnership); err != nil { - return fmt.Errorf("applying statefulset: %w", err) - } - e.complete() return nil } diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index b887725..07d4696 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -950,6 +950,27 @@ spec: items: type: string type: array + statefulSet: + description: |- + StatefulSet references the StatefulSet the controller created for + this SeiNode. UID is the identity check: an STS with the expected + name but a different UID is not the one this controller created + (e.g., manual recreation out-of-band) and triggers replacement. + properties: + name: + description: Name of the StatefulSet (always equals the SeiNode + name). + type: string + uid: + description: |- + UID of the StatefulSet. Used to detect out-of-band recreation: + if a new StatefulSet appears with the same name but a different + UID, the controller knows it is not the one it created. + type: string + required: + - name + - uid + type: object type: object type: object served: true From e17c26eb343d238bc97743ce0bbcceec8c9cd8e7 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 21 May 2026 14:20:31 -0700 Subject: [PATCH 2/4] =?UTF-8?q?fixup:=20address=20Coral=20review=20?= =?UTF-8?q?=E2=80=94=20SSA-based=20sync,=20two-reconcile=20impostor=20reco?= =?UTF-8?q?very,=20expanded=20coverage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The K8s expert review caught a defaulting-churn bug and converged on a cleaner impostor-branch shape. Defaulting churn: - The previous mutate path used apiequality.Semantic.DeepEqual on Spec.Template, which surfaced apiserver-applied PodSpec defaults (RestartPolicy, DNSPolicy, etc.) as divergence. Every reconcile then wrote nil-defaults, the apiserver re-defaulted, the STS bumped generation, and the rollout envtests deadlocked with ReadyReplicas=0. - Switched the writer to Server-Side Apply on the typed *appsv1.StatefulSet — defaulting-immune by design. SSA only compares fields the fieldManager owns, so apiserver defaults never appear as divergence. Same fieldOwner=seinode-controller used by sibling tasks. Impostor recovery (two reconciles): - Reconcile N: detect UID mismatch, Delete the impostor, return (nil, nil) so callers requeue without writing Status. - Reconcile N+1: Owns(StatefulSet) watch fires on the delete event, the reconciler observes NotFound, the Apply creates fresh, callers stamp the new UID onto Status.StatefulSet. - Background propagation (the default) is correct: PVCs from volumeClaimTemplates don't carry BlockOwnerDeletion ownerRefs back to the STS, and PersistentVolumeClaimRetentionPolicy defaults to WhenDeleted=Retain. Foreground would add a finalizer that envtest's apiserver can't drain (no kube-controller-manager). Other review items addressed: - client.FieldOwner on Create/Update so managedFields entries unify with sibling task fieldOwner usage rather than fragmenting. - Spec.Paused doc updated: scales to Replicas=0, no effect on PhaseFailed. - Drop the re-Get after Apply: client.Patch decodes the apiserver response into desired in-place; the prior re-Get hit a cache-lag NotFound on the first Apply. Coverage: - internal/noderesource/sync_test.go (new): 6 unit tests on a fake client covering the SyncStatefulSet branches (create, adopt, update, paused-scales-to-zero, NotFound recreate, impostor delete-then-defer). - envtest TestSeiNode_StatefulSetImpostorReplaced: end-to-end exercise of the two-reconcile pattern; uses Status().Update() to forge the UID mismatch because client.MergeFrom on a pointer-typed sub-struct produces an effectively-empty patch. Co-Authored-By: Claude Opus 4.7 --- api/v1alpha1/seinode_types.go | 10 +- config/crd/sei.io_seinodedeployments.yaml | 9 +- config/crd/sei.io_seinodes.yaml | 13 +- internal/controller/node/statefulset.go | 11 + .../envtest/seinode_statefulset_test.go | 80 ++++++ internal/noderesource/sync.go | 150 ++++++------ internal/noderesource/sync_test.go | 228 ++++++++++++++++++ internal/task/apply_statefulset.go | 18 +- manifests/sei.io_seinodedeployments.yaml | 9 +- manifests/sei.io_seinodes.yaml | 13 +- 10 files changed, 442 insertions(+), 99 deletions(-) create mode 100644 internal/noderesource/sync_test.go diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index ef36049..a13b2d1 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -69,9 +69,12 @@ type SeiNodeSpec struct { Validator *ValidatorSpec `json:"validator,omitempty"` // Paused freezes reconciliation. While true, the controller does not - // advance the lifecycle, start plans, or mutate derived resources. - // In-flight tasks on the cluster run to completion but their results - // are not polled until the field is cleared. + // advance the lifecycle, start plans, or mutate derived resources + // except the owned StatefulSet — which scales to Replicas=0 so pods + // terminate. In-flight tasks on the cluster run to completion but + // their results are not polled until the field is cleared. + // Has no effect on nodes in PhaseFailed; delete and recreate to + // recover from a failed node. // +optional Paused bool `json:"paused,omitempty"` } @@ -370,6 +373,7 @@ type StatefulSetRef struct { // +kubebuilder:subresource:status // +kubebuilder:resource:shortName=snode // +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` +// +kubebuilder:printcolumn:name="StatefulSet",type=string,JSONPath=`.status.statefulSet.name`,priority=1 // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` // SeiNode is the Schema for the seinodes API. diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 08c5ef4..5052647 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -349,9 +349,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 07d4696..5e917ab 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -20,6 +20,10 @@ spec: - jsonPath: .status.phase name: Phase type: string + - jsonPath: .status.statefulSet.name + name: StatefulSet + priority: 1 + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date @@ -218,9 +222,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- diff --git a/internal/controller/node/statefulset.go b/internal/controller/node/statefulset.go index 118119d..81073bd 100644 --- a/internal/controller/node/statefulset.go +++ b/internal/controller/node/statefulset.go @@ -12,11 +12,22 @@ import ( // Get+Create/Update helper and records the resulting object on // Status.StatefulSet so subsequent reconciles fetch the tracked // identity directly. +// +// A nil StatefulSet with no error means the impostor branch fired: +// SyncStatefulSet detected a UID mismatch, issued Delete, and deferred +// the Apply to the next reconcile. Leave Status.StatefulSet untouched — +// the stale UID still points at a now-deleted object, and the next +// reconcile (triggered by the StatefulSet delete watch event) observes +// NotFound and Applies a fresh STS whose new UID gets stamped onto +// Status.StatefulSet. func (r *SeiNodeReconciler) reconcileStatefulSet(ctx context.Context, node *seiv1alpha1.SeiNode) error { sts, err := noderesource.SyncStatefulSet(ctx, r.Client, r.Scheme, node, r.Platform) if err != nil { return fmt.Errorf("syncing statefulset: %w", err) } + if sts == nil { + return nil + } if node.Status.StatefulSet == nil || node.Status.StatefulSet.UID != sts.UID || node.Status.StatefulSet.Name != sts.Name { diff --git a/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go b/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go index b0bb968..b20a194 100644 --- a/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go +++ b/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go @@ -154,6 +154,86 @@ func TestSeiNode_StatefulSetRecreatedAfterDelete(t *testing.T) { }, "deleted StatefulSet must be recreated with a new UID and re-tracked on Status") } +// TestSeiNode_StatefulSetImpostorReplaced covers the UID-mismatch +// recovery: when Status.StatefulSet points at a UID that doesn't match +// the live object, the controller deletes the impostor on one reconcile +// and Applies a fresh StatefulSet on the next. The split keeps each +// reconcile against a clean apiserver state — applying onto a +// still-deleting object has undefined SSA semantics if a finalizer +// ever blocks removal. +// +// envtest has no kube-controller-manager. That's fine for this case: +// StatefulSets carry no finalizers by default, so the Background- +// propagation Delete the controller issues commits to etcd +// immediately. No tombstone state to drain; the Owns watch fires the +// next reconcile, which sees NotFound and creates fresh. +// +// The test asserts on the terminal state — STS recreated with a new +// UID and Status.StatefulSet re-tracked — rather than on the transient +// "delete observed" state, which is unobservably short under +// Background propagation with no finalizers. +func TestSeiNode_StatefulSetImpostorReplaced(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "sts-impostor", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + stsKey := types.NamespacedName{Name: node.Name, Namespace: ns} + + waitFor(t, func() bool { + cur := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, cur); err != nil { + return false + } + return cur.Status.StatefulSet != nil && cur.Status.StatefulSet.UID != "" + }, "Status.StatefulSet recorded after first reconcile") + + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + realUID := cur.Status.StatefulSet.UID + + // Forge a UID mismatch with Status().Update — a MergeFrom patch + // on a pointer-typed sub-struct (Status.StatefulSet is *StatefulSetRef) + // can serialize to an empty body when the diff logic compares + // pointer identity rather than dereferenced contents; Update sends + // the full object and avoids that pitfall. + cur.Status.StatefulSet.UID = "00000000-0000-0000-0000-000000000000" + g.Expect(testCli.Status().Update(testCtx, cur)).To(Succeed()) + + // Trigger a spec-bumped reconcile so the controller picks up the + // forged mismatch. The Owns(StatefulSet) watch would fire on the + // subsequent Delete too, but a spec bump deterministically pokes + // the predicate on the SeiNode itself. + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + specPatch := client.MergeFrom(cur.DeepCopy()) + if cur.Spec.Overrides == nil { + cur.Spec.Overrides = map[string]string{} + } + cur.Spec.Overrides["chain.touch-counter"] = "1" + g.Expect(testCli.Patch(testCtx, cur, specPatch)).To(Succeed()) + + // Converge to terminal state: a new StatefulSet exists with a UID + // different from the original, and Status.StatefulSet tracks it. + // This proves both legs of the impostor branch ran (Delete of the + // realUID object, Apply that produced a fresh UID). + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + if sts.UID == realUID { + return false + } + latest := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, latest); err != nil { + return false + } + return latest.Status.StatefulSet != nil && + latest.Status.StatefulSet.UID == sts.UID + }, "replacement StatefulSet must be created with a new UID and re-tracked on Status") +} + func newTestSeiNode(namespace, name string, paused bool) *seiv1alpha1.SeiNode { return &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{ diff --git a/internal/noderesource/sync.go b/internal/noderesource/sync.go index e4f3de6..6da6bd0 100644 --- a/internal/noderesource/sync.go +++ b/internal/noderesource/sync.go @@ -6,7 +6,6 @@ import ( appsv1 "k8s.io/api/apps/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/runtime" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" @@ -15,25 +14,50 @@ import ( seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) +// statefulSetFieldOwner is the SSA fieldManager recorded on every +// Apply issued by SyncStatefulSet. Sharing the same name across the +// controller and the apply-statefulset task keeps managedFields +// entries unified rather than fragmenting ownership. Matches the +// fieldOwner used by sibling tasks (apply_service, +// apply_rbac_proxy_config). +const statefulSetFieldOwner = client.FieldOwner("seinode-controller") + // SyncStatefulSet brings the live StatefulSet for node into line with -// the rendered desired spec using typed Get + Create/Update — no -// server-side apply, no Unstructured. Returns the resulting live -// object so callers can record its identity (Name/UID) on the SeiNode -// status. +// the rendered desired spec using Server-Side Apply on the typed +// `*appsv1.StatefulSet` — defaulting-immune by design, so apiserver- +// applied PodSpec defaults (RestartPolicy, DNSPolicy, etc.) never +// surface as divergence. // // Behaviors: // - Spec.Paused=true forces Replicas=0 in the desired render. +// Pausing during an in-flight rollout effectively fast-forwards +// the rollout: pods terminate, and the next unpause brings up a +// fresh pod from the current (post-update) template. // - When Status.StatefulSet tracks a UID and the live object has a -// different one, the impostor is deleted and recreated (out-of- -// band recreation by an operator). -// - When the live object is absent, it is created. When present and -// mutable fields diverge, it is Updated; otherwise no apiserver -// write is issued. +// different one, the impostor is deleted and this call returns +// (nil, nil) without applying. The next reconcile observes the +// impostor gone and applies a fresh StatefulSet. Splitting the +// delete and the apply across reconciles keeps each call against +// a clean apiserver state — applying onto a still-deleting object +// (DeletionTimestamp set) has undefined SSA semantics if a future +// finalizer ever lands on the resource. +// +// Returns the live StatefulSet on a normal Apply path. Returns +// (nil, nil) when the impostor was deleted this reconcile and callers +// should requeue without further work. Callers update +// Status.StatefulSet from the returned object; a nil return means the +// stored UID stays in place, and the next reconcile will observe +// NotFound (impostor gone) and Apply a fresh STS whose new UID gets +// stamped onto Status.StatefulSet. // -// Status mutation is left to the caller. The controller updates -// Status.StatefulSet on first observation; the apply-statefulset task -// can ignore the return because the controller catches up on the next -// reconcile. +// Deletion uses the default Background propagation. StatefulSet pods +// own no resources we care about preserving here; volumeClaimTemplates +// PVCs are not garbage-collected with the StatefulSet (they carry no +// blocking ownerRef back to the STS, and +// PersistentVolumeClaimRetentionPolicy defaults to WhenDeleted=Retain). +// Foreground propagation would add a finalizer that blocks STS +// removal until pods drain — unnecessary for impostor swap, and fatal +// in envtest where no kube-controller-manager exists to reap pods. func SyncStatefulSet( ctx context.Context, c client.Client, @@ -45,6 +69,7 @@ func SyncStatefulSet( if err != nil { return nil, fmt.Errorf("generating statefulset: %w", err) } + desired.SetGroupVersionKind(appsv1.SchemeGroupVersion.WithKind("StatefulSet")) if err := ctrl.SetControllerReference(node, desired, scheme); err != nil { return nil, fmt.Errorf("setting owner reference: %w", err) } @@ -52,78 +77,39 @@ func SyncStatefulSet( desired.Spec.Replicas = ptr.To(int32(0)) } - existing := &appsv1.StatefulSet{} key := client.ObjectKeyFromObject(desired) - getErr := c.Get(ctx, key, existing) - - if apierrors.IsNotFound(getErr) { - if err := c.Create(ctx, desired); err != nil { - return nil, fmt.Errorf("creating statefulset: %w", err) - } - return desired, nil - } - if getErr != nil { - return nil, fmt.Errorf("fetching statefulset: %w", getErr) - } - - if node.Status.StatefulSet != nil && existing.UID != node.Status.StatefulSet.UID { - if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { - return nil, fmt.Errorf("deleting impostor statefulset: %w", err) - } - if err := c.Create(ctx, desired); err != nil { - return nil, fmt.Errorf("recreating statefulset after UID mismatch: %w", err) - } - return desired, nil - } - if mutateStatefulSet(existing, desired) { - if err := c.Update(ctx, existing); err != nil { - return nil, fmt.Errorf("updating statefulset: %w", err) + // Impostor recovery: a UID mismatch between the tracked Status and + // the live object means an operator deleted and recreated the + // StatefulSet out-of-band. Issue Delete and return without applying + // — the next reconcile (triggered by the StatefulSet Owns watch + // firing on the delete event) observes a clean NotFound and Applies + // fresh. The Delete is idempotent; re-entering this branch before + // the cache catches up just no-ops. + if node.Status.StatefulSet != nil { + existing := &appsv1.StatefulSet{} + switch err := c.Get(ctx, key, existing); { + case err == nil: + if existing.UID != node.Status.StatefulSet.UID { + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return nil, fmt.Errorf("deleting impostor statefulset: %w", err) + } + return nil, nil + } + case apierrors.IsNotFound(err): + // Live object missing; the Apply below recreates it. + default: + return nil, fmt.Errorf("fetching tracked statefulset: %w", err) } } - return existing, nil -} - -// mutateStatefulSet copies mutable Spec fields from desired into -// existing and returns whether anything changed. Immutable fields -// (Selector, ServiceName, VolumeClaimTemplates, PodManagementPolicy) -// are intentionally left alone so a deterministic-but-not-byte- -// identical render never trips an apiserver immutability check. -func mutateStatefulSet(existing, desired *appsv1.StatefulSet) bool { - changed := false - if !int32PtrEq(existing.Spec.Replicas, desired.Spec.Replicas) { - existing.Spec.Replicas = desired.Spec.Replicas - changed = true - } - if !apiequality.Semantic.DeepEqual(existing.Spec.Template, desired.Spec.Template) { - existing.Spec.Template = desired.Spec.Template - changed = true - } - if !apiequality.Semantic.DeepEqual(existing.Spec.UpdateStrategy, desired.Spec.UpdateStrategy) { - existing.Spec.UpdateStrategy = desired.Spec.UpdateStrategy - changed = true - } - if existing.Spec.MinReadySeconds != desired.Spec.MinReadySeconds { - existing.Spec.MinReadySeconds = desired.Spec.MinReadySeconds - changed = true - } - if !apiequality.Semantic.DeepEqual(existing.Labels, desired.Labels) { - existing.Labels = desired.Labels - changed = true - } - if !apiequality.Semantic.DeepEqual(existing.Annotations, desired.Annotations) { - existing.Annotations = desired.Annotations - changed = true - } - return changed -} -func int32PtrEq(a, b *int32) bool { - if a == nil && b == nil { - return true - } - if a == nil || b == nil { - return false + // client.Apply decodes the apiserver response into desired in-place, + // so its UID and ResourceVersion reflect the live object after this + // call returns. No re-Get is needed (and a re-Get against the cache + // can race with watch propagation, returning NotFound on the very + // first Apply). + if err := c.Patch(ctx, desired, client.Apply, statefulSetFieldOwner, client.ForceOwnership); err != nil { + return nil, fmt.Errorf("applying statefulset: %w", err) } - return *a == *b + return desired, nil } diff --git a/internal/noderesource/sync_test.go b/internal/noderesource/sync_test.go new file mode 100644 index 0000000..e6a49f7 --- /dev/null +++ b/internal/noderesource/sync_test.go @@ -0,0 +1,228 @@ +package noderesource + +import ( + "context" + "testing" + + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/platform/platformtest" +) + +// newSyncTestScheme builds a scheme with both core and sei.io types. +// SyncStatefulSet writes appsv1.StatefulSet and reads seiv1alpha1.SeiNode, +// so both must be registered for the fake client to encode/decode. +func newSyncTestScheme(t *testing.T) *k8sruntime.Scheme { + t.Helper() + s := k8sruntime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + return s +} + +// TestSyncStatefulSet_FirstCreate exercises the cold-start path: no +// tracked Status, no live StatefulSet. SyncStatefulSet renders the +// desired object and Server-Side-Applies it, returning the live result +// for the caller to record on Status. +func TestSyncStatefulSet_FirstCreate(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("first-create", "default") + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil(), "first-create must return the applied StatefulSet") + g.Expect(sts.Name).To(Equal("first-create")) + g.Expect(sts.Namespace).To(Equal("default")) + + live := &appsv1.StatefulSet{} + g.Expect(c.Get(context.Background(), types.NamespacedName{Name: "first-create", Namespace: "default"}, live)).To(Succeed()) +} + +// TestSyncStatefulSet_ImpostorDeletes covers the UID-mismatch branch. +// Status.StatefulSet tracks UID-A. A live StatefulSet with the same +// name but UID-B (the "impostor") is present. SyncStatefulSet must +// Delete the impostor and return (nil, nil) — leaving the Apply to the +// next reconcile, when Get will observe NotFound and the Apply lands +// as a fresh Create. Splitting Delete and Apply across reconciles +// avoids applying onto a still-deleting object. +func TestSyncStatefulSet_ImpostorDeletes(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("impostor-swap", "default") + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: "impostor-swap", + UID: "uid-A", + } + + impostor := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "impostor-swap", + Namespace: "default", + UID: "uid-B", + }, + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node, impostor). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).To(BeNil(), "impostor branch returns nil so caller requeues without applying") + + // The impostor must be gone from the apiserver (fake client honors + // Background-propagation Delete on objects with no finalizers). + live := &appsv1.StatefulSet{} + err = c.Get(context.Background(), types.NamespacedName{Name: "impostor-swap", Namespace: "default"}, live) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "impostor must be deleted; got err=%v", err) +} + +// TestSyncStatefulSet_ImpostorThenApply simulates the two-reconcile +// recovery sequence end-to-end on the fake client. Reconcile 1 deletes +// the impostor and returns nil. Reconcile 2 observes NotFound and +// Applies fresh; the returned StatefulSet has a new identity that the +// caller stamps onto Status. +func TestSyncStatefulSet_ImpostorThenApply(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("impostor-2reconcile", "default") + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: "impostor-2reconcile", + UID: "uid-A", + } + + impostor := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "impostor-2reconcile", + Namespace: "default", + UID: "uid-B", + }, + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node, impostor). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + // Reconcile 1: detect mismatch, Delete, return nil. + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).To(BeNil()) + + // Reconcile 2: Get sees NotFound, Apply creates fresh. + sts, err = SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil(), "second reconcile must Apply fresh") + g.Expect(sts.UID).NotTo(Equal(types.UID("uid-A"))) + g.Expect(sts.UID).NotTo(Equal(types.UID("uid-B"))) +} + +// TestSyncStatefulSet_MatchingUID is the steady-state path. A live +// StatefulSet exists whose UID matches Status.StatefulSet. Sync must +// not delete; it Applies (idempotent) and returns the live object. +func TestSyncStatefulSet_MatchingUID(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("steady-state", "default") + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: "steady-state", + UID: "uid-A", + } + + existing := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "steady-state", + Namespace: "default", + UID: "uid-A", + }, + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node, existing). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil()) + + // The original object must still be present (UID preserved means + // no delete occurred). + live := &appsv1.StatefulSet{} + g.Expect(c.Get(context.Background(), types.NamespacedName{Name: "steady-state", Namespace: "default"}, live)).To(Succeed()) + g.Expect(live.UID).To(Equal(types.UID("uid-A"))) +} + +// TestSyncStatefulSet_TrackedButMissing covers the "controller restarts +// after operator delete" path. Status tracks a UID but the live STS is +// gone. Sync skips the impostor branch and Applies fresh. +func TestSyncStatefulSet_TrackedButMissing(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("tracked-missing", "default") + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: "tracked-missing", + UID: "uid-A", + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil(), "tracked-but-missing must Apply fresh on the same reconcile") +} + +// TestSyncStatefulSet_PausedReplicasZero asserts the pause behavior: +// Spec.Paused=true forces Replicas=0 in the Applied object regardless +// of whether the node is in the impostor branch path. +func TestSyncStatefulSet_PausedReplicasZero(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("paused-zero", "default") + node.Spec.Paused = true + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil()) + g.Expect(sts.Spec.Replicas).NotTo(BeNil()) + g.Expect(*sts.Spec.Replicas).To(Equal(int32(0))) +} diff --git a/internal/task/apply_statefulset.go b/internal/task/apply_statefulset.go index f547b4f..3283022 100644 --- a/internal/task/apply_statefulset.go +++ b/internal/task/apply_statefulset.go @@ -39,13 +39,27 @@ func deserializeApplyStatefulSet(id string, params json.RawMessage, cfg Executio }, nil } +// Execute writes the SeiNode's StatefulSet via the shared SSA helper. +// The SeiNode controller continuously calls the same helper from its +// own reconcile loop; both paths Apply the same rendered content under +// the same fieldManager, so they are idempotent against each other. +// The task remains in the plan as a sync gate for replace-pod. +// +// A nil StatefulSet with no error means SyncStatefulSet detected a +// UID-mismatch impostor and deleted it without applying. Leave the +// task in Running so the executor re-invokes on the next reconcile, +// at which point the impostor is gone and Apply creates fresh. func (e *applyStatefulSetExecution) Execute(ctx context.Context) error { node, err := ResourceAs[*seiv1alpha1.SeiNode](e.cfg) if err != nil { return err } - if _, err := noderesource.SyncStatefulSet(ctx, e.cfg.KubeClient, e.cfg.Scheme, node, e.cfg.Platform); err != nil { - return fmt.Errorf("syncing statefulset: %w", err) + sts, err := noderesource.SyncStatefulSet(ctx, e.cfg.KubeClient, e.cfg.Scheme, node, e.cfg.Platform) + if err != nil { + return fmt.Errorf("applying statefulset: %w", err) + } + if sts == nil { + return nil } e.complete() return nil diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 08c5ef4..5052647 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -349,9 +349,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 07d4696..5e917ab 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -20,6 +20,10 @@ spec: - jsonPath: .status.phase name: Phase type: string + - jsonPath: .status.statefulSet.name + name: StatefulSet + priority: 1 + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date @@ -218,9 +222,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- From ae5aecc90111fc1e3b76a1724e344a28c540ca6f Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 21 May 2026 15:08:10 -0700 Subject: [PATCH 3/4] chore: address lint + regen deepcopy for new StatefulSetRef MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI lint failures from PR #345: - goconst on sync_test.go: extract testNamespace, impostorName, steadyStateName, originalTestUID, impostorTestUID. - modernize: ptr.To(int32(0)) -> new(int32). Drops the unused k8s.io/utils/ptr import. - staticcheck SA1019: client.Apply patch type is deprecated. Re-add the same nolint the original task carried — migrating to typed ApplyConfiguration builders is a substantial separate refactor. verify-generated failure: - New StatefulSetRef type needs DeepCopy generation. Ran `make generate`; updates api/v1alpha1/zz_generated.deepcopy.go. Co-Authored-By: Claude Opus 4.7 --- api/v1alpha1/zz_generated.deepcopy.go | 20 +++++++++ internal/noderesource/sync.go | 4 +- internal/noderesource/sync_test.go | 62 +++++++++++++++------------ 3 files changed, 57 insertions(+), 29 deletions(-) diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 3eee8c1..af6ad57 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1038,6 +1038,11 @@ func (in *SeiNodeStatus) DeepCopyInto(out *SeiNodeStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.StatefulSet != nil { + in, out := &in.StatefulSet, &out.StatefulSet + *out = new(StatefulSetRef) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SeiNodeStatus. @@ -1454,6 +1459,21 @@ func (in *StateSyncSource) DeepCopy() *StateSyncSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StatefulSetRef) DeepCopyInto(out *StatefulSetRef) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StatefulSetRef. +func (in *StatefulSetRef) DeepCopy() *StatefulSetRef { + if in == nil { + return nil + } + out := new(StatefulSetRef) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *StaticPeerSource) DeepCopyInto(out *StaticPeerSource) { *out = *in diff --git a/internal/noderesource/sync.go b/internal/noderesource/sync.go index 6da6bd0..f0a75bb 100644 --- a/internal/noderesource/sync.go +++ b/internal/noderesource/sync.go @@ -7,7 +7,6 @@ import ( appsv1 "k8s.io/api/apps/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -74,7 +73,7 @@ func SyncStatefulSet( return nil, fmt.Errorf("setting owner reference: %w", err) } if node.Spec.Paused { - desired.Spec.Replicas = ptr.To(int32(0)) + desired.Spec.Replicas = new(int32) } key := client.ObjectKeyFromObject(desired) @@ -108,6 +107,7 @@ func SyncStatefulSet( // call returns. No re-Get is needed (and a re-Get against the cache // can race with watch propagation, returning NotFound on the very // first Apply). + //nolint:staticcheck // migrating to typed ApplyConfiguration builders is a separate effort if err := c.Patch(ctx, desired, client.Apply, statefulSetFieldOwner, client.ForceOwnership); err != nil { return nil, fmt.Errorf("applying statefulset: %w", err) } diff --git a/internal/noderesource/sync_test.go b/internal/noderesource/sync_test.go index e6a49f7..18e464b 100644 --- a/internal/noderesource/sync_test.go +++ b/internal/noderesource/sync_test.go @@ -18,6 +18,14 @@ import ( "github.com/sei-protocol/sei-k8s-controller/internal/platform/platformtest" ) +const ( + testNamespace = "default" + impostorName = "impostor-swap" + steadyStateName = "steady-state" + originalTestUID = "uid-A" + impostorTestUID = "uid-B" +) + // newSyncTestScheme builds a scheme with both core and sei.io types. // SyncStatefulSet writes appsv1.StatefulSet and reads seiv1alpha1.SeiNode, // so both must be registered for the fake client to encode/decode. @@ -41,7 +49,7 @@ func TestSyncStatefulSet_FirstCreate(t *testing.T) { g := NewWithT(t) s := newSyncTestScheme(t) - node := newGenesisNode("first-create", "default") + node := newGenesisNode("first-create", testNamespace) c := fake.NewClientBuilder(). WithScheme(s). WithObjects(node). @@ -52,10 +60,10 @@ func TestSyncStatefulSet_FirstCreate(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(sts).NotTo(BeNil(), "first-create must return the applied StatefulSet") g.Expect(sts.Name).To(Equal("first-create")) - g.Expect(sts.Namespace).To(Equal("default")) + g.Expect(sts.Namespace).To(Equal(testNamespace)) live := &appsv1.StatefulSet{} - g.Expect(c.Get(context.Background(), types.NamespacedName{Name: "first-create", Namespace: "default"}, live)).To(Succeed()) + g.Expect(c.Get(context.Background(), types.NamespacedName{Name: "first-create", Namespace: testNamespace}, live)).To(Succeed()) } // TestSyncStatefulSet_ImpostorDeletes covers the UID-mismatch branch. @@ -69,17 +77,17 @@ func TestSyncStatefulSet_ImpostorDeletes(t *testing.T) { g := NewWithT(t) s := newSyncTestScheme(t) - node := newGenesisNode("impostor-swap", "default") + node := newGenesisNode(impostorName, testNamespace) node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ - Name: "impostor-swap", - UID: "uid-A", + Name: impostorName, + UID: originalTestUID, } impostor := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ - Name: "impostor-swap", - Namespace: "default", - UID: "uid-B", + Name: impostorName, + Namespace: testNamespace, + UID: impostorTestUID, }, } @@ -96,7 +104,7 @@ func TestSyncStatefulSet_ImpostorDeletes(t *testing.T) { // The impostor must be gone from the apiserver (fake client honors // Background-propagation Delete on objects with no finalizers). live := &appsv1.StatefulSet{} - err = c.Get(context.Background(), types.NamespacedName{Name: "impostor-swap", Namespace: "default"}, live) + err = c.Get(context.Background(), types.NamespacedName{Name: impostorName, Namespace: testNamespace}, live) g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "impostor must be deleted; got err=%v", err) } @@ -109,17 +117,17 @@ func TestSyncStatefulSet_ImpostorThenApply(t *testing.T) { g := NewWithT(t) s := newSyncTestScheme(t) - node := newGenesisNode("impostor-2reconcile", "default") + node := newGenesisNode("impostor-2reconcile", testNamespace) node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ Name: "impostor-2reconcile", - UID: "uid-A", + UID: originalTestUID, } impostor := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ Name: "impostor-2reconcile", - Namespace: "default", - UID: "uid-B", + Namespace: testNamespace, + UID: impostorTestUID, }, } @@ -138,8 +146,8 @@ func TestSyncStatefulSet_ImpostorThenApply(t *testing.T) { sts, err = SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) g.Expect(err).NotTo(HaveOccurred()) g.Expect(sts).NotTo(BeNil(), "second reconcile must Apply fresh") - g.Expect(sts.UID).NotTo(Equal(types.UID("uid-A"))) - g.Expect(sts.UID).NotTo(Equal(types.UID("uid-B"))) + g.Expect(sts.UID).NotTo(Equal(types.UID(originalTestUID))) + g.Expect(sts.UID).NotTo(Equal(types.UID(impostorTestUID))) } // TestSyncStatefulSet_MatchingUID is the steady-state path. A live @@ -149,17 +157,17 @@ func TestSyncStatefulSet_MatchingUID(t *testing.T) { g := NewWithT(t) s := newSyncTestScheme(t) - node := newGenesisNode("steady-state", "default") + node := newGenesisNode(steadyStateName, testNamespace) node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ - Name: "steady-state", - UID: "uid-A", + Name: steadyStateName, + UID: originalTestUID, } existing := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ - Name: "steady-state", - Namespace: "default", - UID: "uid-A", + Name: steadyStateName, + Namespace: testNamespace, + UID: originalTestUID, }, } @@ -176,8 +184,8 @@ func TestSyncStatefulSet_MatchingUID(t *testing.T) { // The original object must still be present (UID preserved means // no delete occurred). live := &appsv1.StatefulSet{} - g.Expect(c.Get(context.Background(), types.NamespacedName{Name: "steady-state", Namespace: "default"}, live)).To(Succeed()) - g.Expect(live.UID).To(Equal(types.UID("uid-A"))) + g.Expect(c.Get(context.Background(), types.NamespacedName{Name: steadyStateName, Namespace: testNamespace}, live)).To(Succeed()) + g.Expect(live.UID).To(Equal(types.UID(originalTestUID))) } // TestSyncStatefulSet_TrackedButMissing covers the "controller restarts @@ -187,10 +195,10 @@ func TestSyncStatefulSet_TrackedButMissing(t *testing.T) { g := NewWithT(t) s := newSyncTestScheme(t) - node := newGenesisNode("tracked-missing", "default") + node := newGenesisNode("tracked-missing", testNamespace) node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ Name: "tracked-missing", - UID: "uid-A", + UID: originalTestUID, } c := fake.NewClientBuilder(). @@ -211,7 +219,7 @@ func TestSyncStatefulSet_PausedReplicasZero(t *testing.T) { g := NewWithT(t) s := newSyncTestScheme(t) - node := newGenesisNode("paused-zero", "default") + node := newGenesisNode("paused-zero", testNamespace) node.Spec.Paused = true c := fake.NewClientBuilder(). From dd2b4baebc6889e7a707f60f1b44ac2f74deab58 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 21 May 2026 15:14:32 -0700 Subject: [PATCH 4/4] chore: gofmt sync_test.go constant block Column alignment in the const () block was off by one space. Co-Authored-By: Claude Opus 4.7 --- internal/noderesource/sync_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/noderesource/sync_test.go b/internal/noderesource/sync_test.go index 18e464b..d13b643 100644 --- a/internal/noderesource/sync_test.go +++ b/internal/noderesource/sync_test.go @@ -19,11 +19,11 @@ import ( ) const ( - testNamespace = "default" - impostorName = "impostor-swap" - steadyStateName = "steady-state" - originalTestUID = "uid-A" - impostorTestUID = "uid-B" + testNamespace = "default" + impostorName = "impostor-swap" + steadyStateName = "steady-state" + originalTestUID = "uid-A" + impostorTestUID = "uid-B" ) // newSyncTestScheme builds a scheme with both core and sei.io types.