diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 33bbf31..a13b2d1 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -3,6 +3,7 @@ package v1alpha1 import ( apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ) // SeiNodeSpec defines the desired state of a standalone Sei node. @@ -68,9 +69,12 @@ type SeiNodeSpec struct { Validator *ValidatorSpec `json:"validator,omitempty"` // Paused freezes reconciliation. While true, the controller does not - // advance the lifecycle, start plans, or mutate derived resources. - // In-flight tasks on the cluster run to completion but their results - // are not polled until the field is cleared. + // advance the lifecycle, start plans, or mutate derived resources + // except the owned StatefulSet — which scales to Replicas=0 so pods + // terminate. In-flight tasks on the cluster run to completion but + // their results are not polled until the field is cleared. + // Has no effect on nodes in PhaseFailed; delete and recreate to + // recover from a failed node. // +optional Paused bool `json:"paused,omitempty"` } @@ -343,12 +347,33 @@ type SeiNodeStatus struct { // config so the node advertises a reachable address for gossip discovery. // +optional ExternalAddress string `json:"externalAddress,omitempty"` + + // StatefulSet references the StatefulSet the controller created for + // this SeiNode. UID is the identity check: an STS with the expected + // name but a different UID is not the one this controller created + // (e.g., manual recreation out-of-band) and triggers replacement. + // +optional + StatefulSet *StatefulSetRef `json:"statefulSet,omitempty"` +} + +// StatefulSetRef identifies a StatefulSet owned and managed by a +// SeiNode. Stored on Status so the controller can fetch and mutate the +// owned object directly rather than blindly server-side-applying. +type StatefulSetRef struct { + // Name of the StatefulSet (always equals the SeiNode name). + Name string `json:"name"` + + // UID of the StatefulSet. Used to detect out-of-band recreation: + // if a new StatefulSet appears with the same name but a different + // UID, the controller knows it is not the one it created. + UID types.UID `json:"uid"` } // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:shortName=snode // +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` +// +kubebuilder:printcolumn:name="StatefulSet",type=string,JSONPath=`.status.statefulSet.name`,priority=1 // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` // SeiNode is the Schema for the seinodes API. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 3eee8c1..af6ad57 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1038,6 +1038,11 @@ func (in *SeiNodeStatus) DeepCopyInto(out *SeiNodeStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.StatefulSet != nil { + in, out := &in.StatefulSet, &out.StatefulSet + *out = new(StatefulSetRef) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SeiNodeStatus. @@ -1454,6 +1459,21 @@ func (in *StateSyncSource) DeepCopy() *StateSyncSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StatefulSetRef) DeepCopyInto(out *StatefulSetRef) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StatefulSetRef. +func (in *StatefulSetRef) DeepCopy() *StatefulSetRef { + if in == nil { + return nil + } + out := new(StatefulSetRef) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *StaticPeerSource) DeepCopyInto(out *StaticPeerSource) { *out = *in diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 08c5ef4..5052647 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -349,9 +349,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index b887725..5e917ab 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -20,6 +20,10 @@ spec: - jsonPath: .status.phase name: Phase type: string + - jsonPath: .status.statefulSet.name + name: StatefulSet + priority: 1 + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date @@ -218,9 +222,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- @@ -950,6 +957,27 @@ spec: items: type: string type: array + statefulSet: + description: |- + StatefulSet references the StatefulSet the controller created for + this SeiNode. UID is the identity check: an STS with the expected + name but a different UID is not the one this controller created + (e.g., manual recreation out-of-band) and triggers replacement. + properties: + name: + description: Name of the StatefulSet (always equals the SeiNode + name). + type: string + uid: + description: |- + UID of the StatefulSet. Used to detect out-of-band recreation: + if a new StatefulSet appears with the same name but a different + UID, the controller knows it is not the one it created. + type: string + required: + - name + - uid + type: object type: object type: object served: true diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 77683d6..3d98b57 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -112,6 +112,10 @@ func (r *SeiNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, nil } + if err := r.reconcileStatefulSet(ctx, node); err != nil { + return ctrl.Result{}, fmt.Errorf("reconciling statefulset: %w", err) + } + if node.Spec.Paused { if err := flushStatus(); err != nil { return ctrl.Result{}, fmt.Errorf("flushing paused status: %w", err) diff --git a/internal/controller/node/statefulset.go b/internal/controller/node/statefulset.go new file mode 100644 index 0000000..81073bd --- /dev/null +++ b/internal/controller/node/statefulset.go @@ -0,0 +1,40 @@ +package node + +import ( + "context" + "fmt" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" +) + +// reconcileStatefulSet syncs the owned StatefulSet via the typed +// Get+Create/Update helper and records the resulting object on +// Status.StatefulSet so subsequent reconciles fetch the tracked +// identity directly. +// +// A nil StatefulSet with no error means the impostor branch fired: +// SyncStatefulSet detected a UID mismatch, issued Delete, and deferred +// the Apply to the next reconcile. Leave Status.StatefulSet untouched — +// the stale UID still points at a now-deleted object, and the next +// reconcile (triggered by the StatefulSet delete watch event) observes +// NotFound and Applies a fresh STS whose new UID gets stamped onto +// Status.StatefulSet. +func (r *SeiNodeReconciler) reconcileStatefulSet(ctx context.Context, node *seiv1alpha1.SeiNode) error { + sts, err := noderesource.SyncStatefulSet(ctx, r.Client, r.Scheme, node, r.Platform) + if err != nil { + return fmt.Errorf("syncing statefulset: %w", err) + } + if sts == nil { + return nil + } + if node.Status.StatefulSet == nil || + node.Status.StatefulSet.UID != sts.UID || + node.Status.StatefulSet.Name != sts.Name { + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: sts.Name, + UID: sts.UID, + } + } + return nil +} diff --git a/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go b/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go new file mode 100644 index 0000000..b20a194 --- /dev/null +++ b/internal/controller/nodedeployment/envtest/seinode_statefulset_test.go @@ -0,0 +1,250 @@ +//go:build envtest + +package envtest_test + +import ( + "testing" + + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/controller/nodedeployment/envtest/fixtures" +) + +// TestSeiNode_StatefulSetTrackedOnStatus asserts the controller creates +// the owned StatefulSet on first reconcile and records {Name, UID} on +// Status. Subsequent reconciles fetch and mutate the tracked object +// rather than re-applying blindly. +func TestSeiNode_StatefulSetTrackedOnStatus(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "sts-tracked", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + + waitFor(t, func() bool { + cur := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, cur); err != nil { + return false + } + return cur.Status.StatefulSet != nil && cur.Status.StatefulSet.UID != "" + }, "Status.StatefulSet must be recorded after first reconcile") + + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + ref := cur.Status.StatefulSet + g.Expect(ref.Name).To(Equal(node.Name)) + + sts := &appsv1.StatefulSet{} + g.Expect(testCli.Get(testCtx, types.NamespacedName{Name: ref.Name, Namespace: ns}, sts)).To(Succeed()) + g.Expect(sts.UID).To(Equal(ref.UID), "tracked UID must match the live StatefulSet") +} + +// TestSeiNode_Paused_ScalesStatefulSetToZero exercises the hard-pause +// behavior: setting Spec.Paused=true scales the owned StatefulSet to +// zero replicas; clearing it restores the desired replica count. +func TestSeiNode_Paused_ScalesStatefulSetToZero(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "paused-scaled", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + + // Initial reconcile creates the StatefulSet at replicas=1. + stsKey := types.NamespacedName{Name: node.Name, Namespace: ns} + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + return sts.Spec.Replicas != nil && *sts.Spec.Replicas == 1 + }, "StatefulSet must be created with replicas=1 for an unpaused SeiNode") + + // Pause. Controller scales to 0. + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + patch := client.MergeFrom(cur.DeepCopy()) + cur.Spec.Paused = true + g.Expect(testCli.Patch(testCtx, cur, patch)).To(Succeed()) + + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + return sts.Spec.Replicas != nil && *sts.Spec.Replicas == 0 + }, "StatefulSet must scale to replicas=0 after pause") + + // Unpause. Controller restores replicas. + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + patch = client.MergeFrom(cur.DeepCopy()) + cur.Spec.Paused = false + g.Expect(testCli.Patch(testCtx, cur, patch)).To(Succeed()) + + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + return sts.Spec.Replicas != nil && *sts.Spec.Replicas == 1 + }, "StatefulSet must scale back to replicas=1 after unpause") +} + +// TestSeiNode_StatefulSetRecreatedAfterDelete covers the +// out-of-band-deletion case: if an operator deletes the StatefulSet, +// the next reconcile observes the NotFound, recreates the object, and +// re-records the new UID on Status.StatefulSet. +func TestSeiNode_StatefulSetRecreatedAfterDelete(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "sts-recreated", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + + waitFor(t, func() bool { + cur := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, cur); err != nil { + return false + } + return cur.Status.StatefulSet != nil + }, "Status.StatefulSet must be recorded after first reconcile") + + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + originalUID := cur.Status.StatefulSet.UID + + sts := &appsv1.StatefulSet{} + stsKey := types.NamespacedName{Name: node.Name, Namespace: ns} + g.Expect(testCli.Get(testCtx, stsKey, sts)).To(Succeed()) + g.Expect(testCli.Delete(testCtx, sts)).To(Succeed()) + + // Bump the spec so the controller reconciles again (the SeiNode + // reconciler watches StatefulSet via Owns, but envtest sometimes + // races; a Spec edit triggers the predicate deterministically). + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + patch := client.MergeFrom(cur.DeepCopy()) + if cur.Spec.Overrides == nil { + cur.Spec.Overrides = map[string]string{} + } + cur.Spec.Overrides["chain.touch-counter"] = "1" + g.Expect(testCli.Patch(testCtx, cur, patch)).To(Succeed()) + + waitFor(t, func() bool { + s := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, s); err != nil { + return apierrors.IsNotFound(err) == false + } + // Confirm the Status ref reflects the new UID. + latest := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, latest); err != nil { + return false + } + return latest.Status.StatefulSet != nil && + latest.Status.StatefulSet.UID == s.UID && + s.UID != originalUID + }, "deleted StatefulSet must be recreated with a new UID and re-tracked on Status") +} + +// TestSeiNode_StatefulSetImpostorReplaced covers the UID-mismatch +// recovery: when Status.StatefulSet points at a UID that doesn't match +// the live object, the controller deletes the impostor on one reconcile +// and Applies a fresh StatefulSet on the next. The split keeps each +// reconcile against a clean apiserver state — applying onto a +// still-deleting object has undefined SSA semantics if a finalizer +// ever blocks removal. +// +// envtest has no kube-controller-manager. That's fine for this case: +// StatefulSets carry no finalizers by default, so the Background- +// propagation Delete the controller issues commits to etcd +// immediately. No tombstone state to drain; the Owns watch fires the +// next reconcile, which sees NotFound and creates fresh. +// +// The test asserts on the terminal state — STS recreated with a new +// UID and Status.StatefulSet re-tracked — rather than on the transient +// "delete observed" state, which is unobservably short under +// Background propagation with no finalizers. +func TestSeiNode_StatefulSetImpostorReplaced(t *testing.T) { + g := NewWithT(t) + ns := makeNamespace(t) + + node := newTestSeiNode(ns, "sts-impostor", false) + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + key := client.ObjectKeyFromObject(node) + stsKey := types.NamespacedName{Name: node.Name, Namespace: ns} + + waitFor(t, func() bool { + cur := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, cur); err != nil { + return false + } + return cur.Status.StatefulSet != nil && cur.Status.StatefulSet.UID != "" + }, "Status.StatefulSet recorded after first reconcile") + + cur := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + realUID := cur.Status.StatefulSet.UID + + // Forge a UID mismatch with Status().Update — a MergeFrom patch + // on a pointer-typed sub-struct (Status.StatefulSet is *StatefulSetRef) + // can serialize to an empty body when the diff logic compares + // pointer identity rather than dereferenced contents; Update sends + // the full object and avoids that pitfall. + cur.Status.StatefulSet.UID = "00000000-0000-0000-0000-000000000000" + g.Expect(testCli.Status().Update(testCtx, cur)).To(Succeed()) + + // Trigger a spec-bumped reconcile so the controller picks up the + // forged mismatch. The Owns(StatefulSet) watch would fire on the + // subsequent Delete too, but a spec bump deterministically pokes + // the predicate on the SeiNode itself. + g.Expect(testCli.Get(testCtx, key, cur)).To(Succeed()) + specPatch := client.MergeFrom(cur.DeepCopy()) + if cur.Spec.Overrides == nil { + cur.Spec.Overrides = map[string]string{} + } + cur.Spec.Overrides["chain.touch-counter"] = "1" + g.Expect(testCli.Patch(testCtx, cur, specPatch)).To(Succeed()) + + // Converge to terminal state: a new StatefulSet exists with a UID + // different from the original, and Status.StatefulSet tracks it. + // This proves both legs of the impostor branch ran (Delete of the + // realUID object, Apply that produced a fresh UID). + waitFor(t, func() bool { + sts := &appsv1.StatefulSet{} + if err := testCli.Get(testCtx, stsKey, sts); err != nil { + return false + } + if sts.UID == realUID { + return false + } + latest := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, key, latest); err != nil { + return false + } + return latest.Status.StatefulSet != nil && + latest.Status.StatefulSet.UID == sts.UID + }, "replacement StatefulSet must be created with a new UID and re-tracked on Status") +} + +func newTestSeiNode(namespace, name string, paused bool) *seiv1alpha1.SeiNode { + return &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "pacific-1", + Image: fixtures.DefaultImage, + FullNode: &seiv1alpha1.FullNodeSpec{}, + Paused: paused, + }, + } +} diff --git a/internal/noderesource/sync.go b/internal/noderesource/sync.go new file mode 100644 index 0000000..f0a75bb --- /dev/null +++ b/internal/noderesource/sync.go @@ -0,0 +1,115 @@ +package noderesource + +import ( + "context" + "fmt" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" +) + +// statefulSetFieldOwner is the SSA fieldManager recorded on every +// Apply issued by SyncStatefulSet. Sharing the same name across the +// controller and the apply-statefulset task keeps managedFields +// entries unified rather than fragmenting ownership. Matches the +// fieldOwner used by sibling tasks (apply_service, +// apply_rbac_proxy_config). +const statefulSetFieldOwner = client.FieldOwner("seinode-controller") + +// SyncStatefulSet brings the live StatefulSet for node into line with +// the rendered desired spec using Server-Side Apply on the typed +// `*appsv1.StatefulSet` — defaulting-immune by design, so apiserver- +// applied PodSpec defaults (RestartPolicy, DNSPolicy, etc.) never +// surface as divergence. +// +// Behaviors: +// - Spec.Paused=true forces Replicas=0 in the desired render. +// Pausing during an in-flight rollout effectively fast-forwards +// the rollout: pods terminate, and the next unpause brings up a +// fresh pod from the current (post-update) template. +// - When Status.StatefulSet tracks a UID and the live object has a +// different one, the impostor is deleted and this call returns +// (nil, nil) without applying. The next reconcile observes the +// impostor gone and applies a fresh StatefulSet. Splitting the +// delete and the apply across reconciles keeps each call against +// a clean apiserver state — applying onto a still-deleting object +// (DeletionTimestamp set) has undefined SSA semantics if a future +// finalizer ever lands on the resource. +// +// Returns the live StatefulSet on a normal Apply path. Returns +// (nil, nil) when the impostor was deleted this reconcile and callers +// should requeue without further work. Callers update +// Status.StatefulSet from the returned object; a nil return means the +// stored UID stays in place, and the next reconcile will observe +// NotFound (impostor gone) and Apply a fresh STS whose new UID gets +// stamped onto Status.StatefulSet. +// +// Deletion uses the default Background propagation. StatefulSet pods +// own no resources we care about preserving here; volumeClaimTemplates +// PVCs are not garbage-collected with the StatefulSet (they carry no +// blocking ownerRef back to the STS, and +// PersistentVolumeClaimRetentionPolicy defaults to WhenDeleted=Retain). +// Foreground propagation would add a finalizer that blocks STS +// removal until pods drain — unnecessary for impostor swap, and fatal +// in envtest where no kube-controller-manager exists to reap pods. +func SyncStatefulSet( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + node *seiv1alpha1.SeiNode, + platform PlatformConfig, +) (*appsv1.StatefulSet, error) { + desired, err := GenerateStatefulSet(node, platform) + if err != nil { + return nil, fmt.Errorf("generating statefulset: %w", err) + } + desired.SetGroupVersionKind(appsv1.SchemeGroupVersion.WithKind("StatefulSet")) + if err := ctrl.SetControllerReference(node, desired, scheme); err != nil { + return nil, fmt.Errorf("setting owner reference: %w", err) + } + if node.Spec.Paused { + desired.Spec.Replicas = new(int32) + } + + key := client.ObjectKeyFromObject(desired) + + // Impostor recovery: a UID mismatch between the tracked Status and + // the live object means an operator deleted and recreated the + // StatefulSet out-of-band. Issue Delete and return without applying + // — the next reconcile (triggered by the StatefulSet Owns watch + // firing on the delete event) observes a clean NotFound and Applies + // fresh. The Delete is idempotent; re-entering this branch before + // the cache catches up just no-ops. + if node.Status.StatefulSet != nil { + existing := &appsv1.StatefulSet{} + switch err := c.Get(ctx, key, existing); { + case err == nil: + if existing.UID != node.Status.StatefulSet.UID { + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return nil, fmt.Errorf("deleting impostor statefulset: %w", err) + } + return nil, nil + } + case apierrors.IsNotFound(err): + // Live object missing; the Apply below recreates it. + default: + return nil, fmt.Errorf("fetching tracked statefulset: %w", err) + } + } + + // client.Apply decodes the apiserver response into desired in-place, + // so its UID and ResourceVersion reflect the live object after this + // call returns. No re-Get is needed (and a re-Get against the cache + // can race with watch propagation, returning NotFound on the very + // first Apply). + //nolint:staticcheck // migrating to typed ApplyConfiguration builders is a separate effort + if err := c.Patch(ctx, desired, client.Apply, statefulSetFieldOwner, client.ForceOwnership); err != nil { + return nil, fmt.Errorf("applying statefulset: %w", err) + } + return desired, nil +} diff --git a/internal/noderesource/sync_test.go b/internal/noderesource/sync_test.go new file mode 100644 index 0000000..d13b643 --- /dev/null +++ b/internal/noderesource/sync_test.go @@ -0,0 +1,236 @@ +package noderesource + +import ( + "context" + "testing" + + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/platform/platformtest" +) + +const ( + testNamespace = "default" + impostorName = "impostor-swap" + steadyStateName = "steady-state" + originalTestUID = "uid-A" + impostorTestUID = "uid-B" +) + +// newSyncTestScheme builds a scheme with both core and sei.io types. +// SyncStatefulSet writes appsv1.StatefulSet and reads seiv1alpha1.SeiNode, +// so both must be registered for the fake client to encode/decode. +func newSyncTestScheme(t *testing.T) *k8sruntime.Scheme { + t.Helper() + s := k8sruntime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatal(err) + } + if err := seiv1alpha1.AddToScheme(s); err != nil { + t.Fatal(err) + } + return s +} + +// TestSyncStatefulSet_FirstCreate exercises the cold-start path: no +// tracked Status, no live StatefulSet. SyncStatefulSet renders the +// desired object and Server-Side-Applies it, returning the live result +// for the caller to record on Status. +func TestSyncStatefulSet_FirstCreate(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("first-create", testNamespace) + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil(), "first-create must return the applied StatefulSet") + g.Expect(sts.Name).To(Equal("first-create")) + g.Expect(sts.Namespace).To(Equal(testNamespace)) + + live := &appsv1.StatefulSet{} + g.Expect(c.Get(context.Background(), types.NamespacedName{Name: "first-create", Namespace: testNamespace}, live)).To(Succeed()) +} + +// TestSyncStatefulSet_ImpostorDeletes covers the UID-mismatch branch. +// Status.StatefulSet tracks UID-A. A live StatefulSet with the same +// name but UID-B (the "impostor") is present. SyncStatefulSet must +// Delete the impostor and return (nil, nil) — leaving the Apply to the +// next reconcile, when Get will observe NotFound and the Apply lands +// as a fresh Create. Splitting Delete and Apply across reconciles +// avoids applying onto a still-deleting object. +func TestSyncStatefulSet_ImpostorDeletes(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode(impostorName, testNamespace) + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: impostorName, + UID: originalTestUID, + } + + impostor := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: impostorName, + Namespace: testNamespace, + UID: impostorTestUID, + }, + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node, impostor). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).To(BeNil(), "impostor branch returns nil so caller requeues without applying") + + // The impostor must be gone from the apiserver (fake client honors + // Background-propagation Delete on objects with no finalizers). + live := &appsv1.StatefulSet{} + err = c.Get(context.Background(), types.NamespacedName{Name: impostorName, Namespace: testNamespace}, live) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "impostor must be deleted; got err=%v", err) +} + +// TestSyncStatefulSet_ImpostorThenApply simulates the two-reconcile +// recovery sequence end-to-end on the fake client. Reconcile 1 deletes +// the impostor and returns nil. Reconcile 2 observes NotFound and +// Applies fresh; the returned StatefulSet has a new identity that the +// caller stamps onto Status. +func TestSyncStatefulSet_ImpostorThenApply(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("impostor-2reconcile", testNamespace) + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: "impostor-2reconcile", + UID: originalTestUID, + } + + impostor := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "impostor-2reconcile", + Namespace: testNamespace, + UID: impostorTestUID, + }, + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node, impostor). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + // Reconcile 1: detect mismatch, Delete, return nil. + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).To(BeNil()) + + // Reconcile 2: Get sees NotFound, Apply creates fresh. + sts, err = SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil(), "second reconcile must Apply fresh") + g.Expect(sts.UID).NotTo(Equal(types.UID(originalTestUID))) + g.Expect(sts.UID).NotTo(Equal(types.UID(impostorTestUID))) +} + +// TestSyncStatefulSet_MatchingUID is the steady-state path. A live +// StatefulSet exists whose UID matches Status.StatefulSet. Sync must +// not delete; it Applies (idempotent) and returns the live object. +func TestSyncStatefulSet_MatchingUID(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode(steadyStateName, testNamespace) + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: steadyStateName, + UID: originalTestUID, + } + + existing := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: steadyStateName, + Namespace: testNamespace, + UID: originalTestUID, + }, + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node, existing). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil()) + + // The original object must still be present (UID preserved means + // no delete occurred). + live := &appsv1.StatefulSet{} + g.Expect(c.Get(context.Background(), types.NamespacedName{Name: steadyStateName, Namespace: testNamespace}, live)).To(Succeed()) + g.Expect(live.UID).To(Equal(types.UID(originalTestUID))) +} + +// TestSyncStatefulSet_TrackedButMissing covers the "controller restarts +// after operator delete" path. Status tracks a UID but the live STS is +// gone. Sync skips the impostor branch and Applies fresh. +func TestSyncStatefulSet_TrackedButMissing(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("tracked-missing", testNamespace) + node.Status.StatefulSet = &seiv1alpha1.StatefulSetRef{ + Name: "tracked-missing", + UID: originalTestUID, + } + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil(), "tracked-but-missing must Apply fresh on the same reconcile") +} + +// TestSyncStatefulSet_PausedReplicasZero asserts the pause behavior: +// Spec.Paused=true forces Replicas=0 in the Applied object regardless +// of whether the node is in the impostor branch path. +func TestSyncStatefulSet_PausedReplicasZero(t *testing.T) { + g := NewWithT(t) + s := newSyncTestScheme(t) + + node := newGenesisNode("paused-zero", testNamespace) + node.Spec.Paused = true + + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(node). + WithStatusSubresource(&seiv1alpha1.SeiNode{}). + Build() + + sts, err := SyncStatefulSet(context.Background(), c, s, node, platformtest.Config()) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(sts).NotTo(BeNil()) + g.Expect(sts.Spec.Replicas).NotTo(BeNil()) + g.Expect(*sts.Spec.Replicas).To(Equal(int32(0))) +} diff --git a/internal/task/apply_statefulset.go b/internal/task/apply_statefulset.go index 4b8c745..3283022 100644 --- a/internal/task/apply_statefulset.go +++ b/internal/task/apply_statefulset.go @@ -5,10 +5,6 @@ import ( "encoding/json" "fmt" - appsv1 "k8s.io/api/apps/v1" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" ) @@ -43,26 +39,28 @@ func deserializeApplyStatefulSet(id string, params json.RawMessage, cfg Executio }, nil } +// Execute writes the SeiNode's StatefulSet via the shared SSA helper. +// The SeiNode controller continuously calls the same helper from its +// own reconcile loop; both paths Apply the same rendered content under +// the same fieldManager, so they are idempotent against each other. +// The task remains in the plan as a sync gate for replace-pod. +// +// A nil StatefulSet with no error means SyncStatefulSet detected a +// UID-mismatch impostor and deleted it without applying. Leave the +// task in Running so the executor re-invokes on the next reconcile, +// at which point the impostor is gone and Apply creates fresh. func (e *applyStatefulSetExecution) Execute(ctx context.Context) error { node, err := ResourceAs[*seiv1alpha1.SeiNode](e.cfg) if err != nil { return err } - - desired, err := noderesource.GenerateStatefulSet(node, e.cfg.Platform) + sts, err := noderesource.SyncStatefulSet(ctx, e.cfg.KubeClient, e.cfg.Scheme, node, e.cfg.Platform) if err != nil { - return Terminal(fmt.Errorf("generating statefulset: %w", err)) - } - desired.SetGroupVersionKind(appsv1.SchemeGroupVersion.WithKind("StatefulSet")) - if err := ctrl.SetControllerReference(node, desired, e.cfg.Scheme); err != nil { - return fmt.Errorf("setting owner reference on statefulset: %w", err) - } - - //nolint:staticcheck // migrating to typed ApplyConfiguration is a separate effort - if err := e.cfg.KubeClient.Patch(ctx, desired, client.Apply, fieldOwner, client.ForceOwnership); err != nil { return fmt.Errorf("applying statefulset: %w", err) } - + if sts == nil { + return nil + } e.complete() return nil } diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 08c5ef4..5052647 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -349,9 +349,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index b887725..5e917ab 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -20,6 +20,10 @@ spec: - jsonPath: .status.phase name: Phase type: string + - jsonPath: .status.statefulSet.name + name: StatefulSet + priority: 1 + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date @@ -218,9 +222,12 @@ spec: paused: description: |- Paused freezes reconciliation. While true, the controller does not - advance the lifecycle, start plans, or mutate derived resources. - In-flight tasks on the cluster run to completion but their results - are not polled until the field is cleared. + advance the lifecycle, start plans, or mutate derived resources + except the owned StatefulSet — which scales to Replicas=0 so pods + terminate. In-flight tasks on the cluster run to completion but + their results are not polled until the field is cleared. + Has no effect on nodes in PhaseFailed; delete and recreate to + recover from a failed node. type: boolean peers: description: |- @@ -950,6 +957,27 @@ spec: items: type: string type: array + statefulSet: + description: |- + StatefulSet references the StatefulSet the controller created for + this SeiNode. UID is the identity check: an STS with the expected + name but a different UID is not the one this controller created + (e.g., manual recreation out-of-band) and triggers replacement. + properties: + name: + description: Name of the StatefulSet (always equals the SeiNode + name). + type: string + uid: + description: |- + UID of the StatefulSet. Used to detect out-of-band recreation: + if a new StatefulSet appears with the same name but a different + UID, the controller knows it is not the one it created. + type: string + required: + - name + - uid + type: object type: object type: object served: true