From c0a66abcce3a9c0fc86e91f3cf5283583131f39a Mon Sep 17 00:00:00 2001 From: Predrag Knezevic Date: Fri, 9 Jan 2026 17:23:48 +0100 Subject: [PATCH] Support `ClusterExtension` progress deadline detection Adds optional `.spec.progressDeadlineMinutes` field to `ClusterExtension` and `ClusterExtensionRevision` that defines the maximum time an extension version can take to roll out before being marked as failed. When configured, if a `ClusterExtensionRevision` fails to roll out within the specified duration, the `Progressing` condition is set to `False` with reason `ProgressDeadlineExceeded`. This signals that manual intervention is required and stops automatic retry attempts. Added unit and e2e test asserting the added behavior. --- api/v1/clusterextension_types.go | 11 ++ api/v1/clusterextensionrevision_types.go | 11 ++ api/v1/common_types.go | 5 +- api/v1/validation_test.go | 176 ++++++++++++++++++ docs/api-reference/olmv1-api-reference.md | 1 + ...ramework.io_clusterextensionrevisions.yaml | 10 + ...peratorframework.io_clusterextensions.yaml | 10 + .../operator-controller/applier/boxcutter.go | 6 +- .../applier/boxcutter_test.go | 60 ++++++ .../conditionsets/conditionsets.go | 1 + .../clusterextensionrevision_controller.go | 41 +++- manifests/experimental-e2e.yaml | 20 ++ manifests/experimental.yaml | 20 ++ test/e2e/features/install.feature | 32 ++++ test/e2e/steps/steps.go | 60 ++++++ .../v1.0.3/manifests/bundle.configmap.yaml | 7 + .../testoperator.clusterserviceversion.yaml | 151 +++++++++++++++ .../v1.0.3/metadata/annotations.yaml | 10 + .../test-catalog/v1/configs/catalog.yaml | 12 ++ 19 files changed, 640 insertions(+), 4 deletions(-) create mode 100644 api/v1/validation_test.go create mode 100644 testdata/images/bundles/test-operator/v1.0.3/manifests/bundle.configmap.yaml create mode 100644 testdata/images/bundles/test-operator/v1.0.3/manifests/testoperator.clusterserviceversion.yaml create mode 100644 testdata/images/bundles/test-operator/v1.0.3/metadata/annotations.yaml diff --git a/api/v1/clusterextension_types.go b/api/v1/clusterextension_types.go index 2846b24c66..d150d49971 100644 --- a/api/v1/clusterextension_types.go +++ b/api/v1/clusterextension_types.go @@ -107,6 +107,17 @@ type ClusterExtensionSpec struct { // // +optional Config *ClusterExtensionConfig `json:"config,omitempty"` + + // progressDeadlineMinutes is an optional field that defines the maximum period + // of time in minutes after which an installation should be considered failed and + // require manual intervention. This functionality is disabled when no value + // is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + // + // +kubebuilder:validation:Minimum:=10 + // +kubebuilder:validation:Maximum:=720 + // +optional + // + ProgressDeadlineMinutes int32 `json:"progressDeadlineMinutes,omitempty"` } const SourceTypeCatalog = "Catalog" diff --git a/api/v1/clusterextensionrevision_types.go b/api/v1/clusterextensionrevision_types.go index f7e7ff0642..0d733be61f 100644 --- a/api/v1/clusterextensionrevision_types.go +++ b/api/v1/clusterextensionrevision_types.go @@ -87,6 +87,17 @@ type ClusterExtensionRevisionSpec struct { // +listMapKey=name // +optional Phases []ClusterExtensionRevisionPhase `json:"phases,omitempty"` + + // progressDeadlineMinutes is an optional field that defines the maximum period + // of time in minutes after which an installation should be considered failed and + // require manual intervention. This functionality is disabled when no value + // is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + // + // +kubebuilder:validation:Minimum:=10 + // +kubebuilder:validation:Maximum:=720 + // +optional + // + ProgressDeadlineMinutes int32 `json:"progressDeadlineMinutes,omitempty"` } // ClusterExtensionRevisionLifecycleState specifies the lifecycle state of the ClusterExtensionRevision. diff --git a/api/v1/common_types.go b/api/v1/common_types.go index 115836b10c..57e030f0b0 100644 --- a/api/v1/common_types.go +++ b/api/v1/common_types.go @@ -32,6 +32,7 @@ const ( ReasonDeprecated = "Deprecated" // Common reasons - ReasonSucceeded = "Succeeded" - ReasonFailed = "Failed" + ReasonSucceeded = "Succeeded" + ReasonFailed = "Failed" + ReasonProgressDeadlineExceeded = "ProgressDeadlineExceeded" ) diff --git a/api/v1/validation_test.go b/api/v1/validation_test.go new file mode 100644 index 0000000000..bbd755c3c0 --- /dev/null +++ b/api/v1/validation_test.go @@ -0,0 +1,176 @@ +package v1 + +import ( + "fmt" + "testing" + + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func TestValidate(t *testing.T) { + type args struct { + object any + skipDefaulting bool + } + type want struct { + valid bool + } + type testCase struct { + args args + want want + } + defaultExtensionSpec := func(s *ClusterExtensionSpec) *ClusterExtensionSpec { + s.Namespace = "ns" + s.ServiceAccount = ServiceAccountReference{ + Name: "sa", + } + s.Source = SourceConfig{ + SourceType: SourceTypeCatalog, + Catalog: &CatalogFilter{ + PackageName: "test", + }, + } + return s + } + defaultRevisionSpec := func(s *ClusterExtensionRevisionSpec) *ClusterExtensionRevisionSpec { + s.Revision = 1 + return s + } + c := newClient(t) + i := 0 + + for name, tc := range map[string]testCase{ + "ClusterExtension: invalid progress deadline < 10": { + args: args{ + object: ClusterExtensionSpec{ + ProgressDeadlineMinutes: 9, + }, + }, + want: want{valid: false}, + }, + "ClusterExtension: valid progress deadline = 10": { + args: args{ + object: ClusterExtensionSpec{ + ProgressDeadlineMinutes: 10, + }, + }, + want: want{valid: true}, + }, + "ClusterExtension: valid progress deadline = 360": { + args: args{ + object: ClusterExtensionSpec{ + ProgressDeadlineMinutes: 360, + }, + }, + want: want{valid: true}, + }, + "ClusterExtension: valid progress deadline = 720": { + args: args{ + object: ClusterExtensionSpec{ + ProgressDeadlineMinutes: 720, + }, + }, + want: want{valid: true}, + }, + "ClusterExtension: invalid progress deadline > 720": { + args: args{ + object: ClusterExtensionSpec{ + ProgressDeadlineMinutes: 721, + }, + }, + want: want{valid: false}, + }, + "ClusterExtension: no progress deadline set": { + args: args{ + object: ClusterExtensionSpec{}, + }, + want: want{valid: true}, + }, + "ClusterExtensionRevision: invalid progress deadline < 10": { + args: args{ + object: ClusterExtensionRevisionSpec{ + ProgressDeadlineMinutes: 9, + }, + }, + want: want{valid: false}, + }, + "ClusterExtensionRevision: valid progress deadline = 10": { + args: args{ + object: ClusterExtensionRevisionSpec{ + ProgressDeadlineMinutes: 10, + }, + }, + want: want{valid: true}, + }, + "ClusterExtensionRevision: valid progress deadline = 360": { + args: args{ + object: ClusterExtensionRevisionSpec{ + ProgressDeadlineMinutes: 360, + }, + }, + want: want{valid: true}, + }, + "ClusterExtensionRevision: valid progress deadline = 720": { + args: args{ + object: ClusterExtensionRevisionSpec{ + ProgressDeadlineMinutes: 720, + }, + }, + want: want{valid: true}, + }, + "ClusterExtensionRevision: invalid progress deadline > 720": { + args: args{ + object: ClusterExtensionRevisionSpec{ + ProgressDeadlineMinutes: 721, + }, + }, + want: want{valid: false}, + }, + "ClusterExtensionRevision: no progress deadline set": { + args: args{ + object: ClusterExtensionRevisionSpec{}, + }, + want: want{valid: true}, + }, + } { + t.Run(name, func(t *testing.T) { + var obj client.Object + switch s := tc.args.object.(type) { + case ClusterExtensionSpec: + ce := &ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("ce-%d", i), + }, + Spec: s, + } + if !tc.args.skipDefaulting { + defaultExtensionSpec(&ce.Spec) + } + obj = ce + case ClusterExtensionRevisionSpec: + cer := &ClusterExtensionRevision{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("cer-%d", i), + }, + Spec: s, + } + if !tc.args.skipDefaulting { + defaultRevisionSpec(&cer.Spec) + } + obj = cer + default: + t.Fatalf("unknown type %T", s) + } + i++ + err := c.Create(t.Context(), obj) + if tc.want.valid && err != nil { + t.Fatal("expected create to succeed, but got:", err) + } + if !tc.want.valid && !errors.IsInvalid(err) { + t.Fatal("expected create to fail due to invalid payload, but got:", err) + } + }) + } +} diff --git a/docs/api-reference/olmv1-api-reference.md b/docs/api-reference/olmv1-api-reference.md index 4feb0d6323..c9cff6e48f 100644 --- a/docs/api-reference/olmv1-api-reference.md +++ b/docs/api-reference/olmv1-api-reference.md @@ -344,6 +344,7 @@ _Appears in:_ | `source` _[SourceConfig](#sourceconfig)_ | source is required and selects the installation source of content for this ClusterExtension.
Set the sourceType field to perform the selection.
Catalog is currently the only implemented sourceType.
Setting sourceType to "Catalog" requires the catalog field to also be defined.
Below is a minimal example of a source definition (in yaml):
source:
sourceType: Catalog
catalog:
packageName: example-package | | Required: \{\}
| | `install` _[ClusterExtensionInstallConfig](#clusterextensioninstallconfig)_ | install is optional and configures installation options for the ClusterExtension,
such as the pre-flight check configuration. | | | | `config` _[ClusterExtensionConfig](#clusterextensionconfig)_ | config is optional and specifies bundle-specific configuration.
Configuration is bundle-specific and a bundle may provide a configuration schema.
When not specified, the default configuration of the resolved bundle is used.
config is validated against a configuration schema provided by the resolved bundle. If the bundle does not provide
a configuration schema the bundle is deemed to not be configurable. More information on how
to configure bundles can be found in the OLM documentation associated with your current OLM version. | | | +| `progressDeadlineMinutes` _integer_ | progressDeadlineMinutes is an optional field that defines the maximum period
of time in minutes after which an installation should be considered failed and
require manual intervention. This functionality is disabled when no value
is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours).
| | Maximum: 720
Minimum: 10
| #### ClusterExtensionStatus diff --git a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml index 1e435dc706..31e267b308 100644 --- a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml +++ b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensionrevisions.yaml @@ -166,6 +166,16 @@ spec: x-kubernetes-validations: - message: phases is immutable rule: self == oldSelf || oldSelf.size() == 0 + progressDeadlineMinutes: + description: |- + progressDeadlineMinutes is an optional field that defines the maximum period + of time in minutes after which an installation should be considered failed and + require manual intervention. This functionality is disabled when no value + is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + format: int32 + maximum: 720 + minimum: 10 + type: integer revision: description: |- revision is a required, immutable sequence number representing a specific revision diff --git a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml index 7194392b6b..66824fa12f 100644 --- a/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml +++ b/helm/olmv1/base/operator-controller/crd/experimental/olm.operatorframework.io_clusterextensions.yaml @@ -165,6 +165,16 @@ spec: rule: self == oldSelf - message: namespace must be a valid DNS1123 label rule: self.matches("^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + progressDeadlineMinutes: + description: |- + progressDeadlineMinutes is an optional field that defines the maximum period + of time in minutes after which an installation should be considered failed and + require manual intervention. This functionality is disabled when no value + is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + format: int32 + maximum: 720 + minimum: 10 + type: integer serviceAccount: description: |- serviceAccount specifies a ServiceAccount used to perform all interactions with the cluster diff --git a/internal/operator-controller/applier/boxcutter.go b/internal/operator-controller/applier/boxcutter.go index 63578e9cb8..a10f2cbba8 100644 --- a/internal/operator-controller/applier/boxcutter.go +++ b/internal/operator-controller/applier/boxcutter.go @@ -191,7 +191,7 @@ func (r *SimpleRevisionGenerator) buildClusterExtensionRevision( annotations[labels.ServiceAccountNameKey] = ext.Spec.ServiceAccount.Name annotations[labels.ServiceAccountNamespaceKey] = ext.Spec.Namespace - return &ocv1.ClusterExtensionRevision{ + cer := &ocv1.ClusterExtensionRevision{ ObjectMeta: metav1.ObjectMeta{ Annotations: annotations, Labels: map[string]string{ @@ -206,6 +206,10 @@ func (r *SimpleRevisionGenerator) buildClusterExtensionRevision( Phases: PhaseSort(objects), }, } + if p := ext.Spec.ProgressDeadlineMinutes; p > 0 { + cer.Spec.ProgressDeadlineMinutes = p + } + return cer } // BoxcutterStorageMigrator migrates ClusterExtensions from Helm-based storage to diff --git a/internal/operator-controller/applier/boxcutter_test.go b/internal/operator-controller/applier/boxcutter_test.go index 081747285c..df3c0a915a 100644 --- a/internal/operator-controller/applier/boxcutter_test.go +++ b/internal/operator-controller/applier/boxcutter_test.go @@ -22,6 +22,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/validation/field" k8scheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/client/interceptor" @@ -327,6 +328,65 @@ func Test_SimpleRevisionGenerator_AppliesObjectLabelsAndRevisionAnnotations(t *t require.Equal(t, revAnnotations, rev.Annotations) } +func Test_SimpleRevisionGenerator_PropagatesProgressDeadlineMinutes(t *testing.T) { + r := &FakeManifestProvider{ + GetFn: func(b fs.FS, e *ocv1.ClusterExtension) ([]client.Object, error) { + return []client.Object{}, nil + }, + } + + b := applier.SimpleRevisionGenerator{ + Scheme: k8scheme.Scheme, + ManifestProvider: r, + } + + type args struct { + progressDeadlineMinutes *int32 + } + type want struct { + progressDeadlineMinutes int32 + } + type testCase struct { + args args + want want + } + for name, tc := range map[string]testCase{ + "propagates when set": { + args: args{ + progressDeadlineMinutes: ptr.To(int32(10)), + }, + want: want{ + progressDeadlineMinutes: 10, + }, + }, + "do not propagate when unset": { + want: want{ + progressDeadlineMinutes: 0, + }, + }, + } { + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-extension", + }, + Spec: ocv1.ClusterExtensionSpec{ + Namespace: "test-namespace", + ServiceAccount: ocv1.ServiceAccountReference{Name: "test-sa"}, + }, + } + empty := map[string]string{} + t.Run(name, func(t *testing.T) { + if pd := tc.args.progressDeadlineMinutes; pd != nil { + ext.Spec.ProgressDeadlineMinutes = *pd + } + + rev, err := b.GenerateRevision(t.Context(), fstest.MapFS{}, ext, empty, empty) + require.NoError(t, err) + require.Equal(t, tc.want.progressDeadlineMinutes, rev.Spec.ProgressDeadlineMinutes) + }) + } +} + func Test_SimpleRevisionGenerator_Failure(t *testing.T) { r := &FakeManifestProvider{ GetFn: func(b fs.FS, e *ocv1.ClusterExtension) ([]client.Object, error) { diff --git a/internal/operator-controller/conditionsets/conditionsets.go b/internal/operator-controller/conditionsets/conditionsets.go index 6c33b1c8f9..e72a95c2a4 100644 --- a/internal/operator-controller/conditionsets/conditionsets.go +++ b/internal/operator-controller/conditionsets/conditionsets.go @@ -41,4 +41,5 @@ var ConditionReasons = []string{ ocv1.ReasonRetrying, ocv1.ReasonAbsent, ocv1.ReasonRollingOut, + ocv1.ReasonProgressDeadlineExceeded, } diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index a2173a2fcf..c343a428b9 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "strings" + "sync" "time" appsv1 "k8s.io/api/apps/v1" @@ -27,6 +28,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -46,6 +48,9 @@ type ClusterExtensionRevisionReconciler struct { Client client.Client RevisionEngineFactory RevisionEngineFactory TrackingCache trackingCache + // track if we have queued up the reconciliation that detects eventual progress deadline issues + // keys is revision UUID, value is boolean + progressDeadlineCheckInFlight sync.Map } type trackingCache interface { @@ -74,6 +79,21 @@ func (c *ClusterExtensionRevisionReconciler) Reconcile(ctx context.Context, req reconciledRev := existingRev.DeepCopy() res, reconcileErr := c.reconcile(ctx, reconciledRev) + if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 { + cnd := meta.FindStatusCondition(reconciledRev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + isStillProgressing := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason != ocv1.ReasonSucceeded + if isStillProgressing { + timeout := time.Duration(pd) * time.Minute + if time.Since(existingRev.CreationTimestamp.Time) > timeout { + markAsNotProgressing(reconciledRev, ocv1.ReasonProgressDeadlineExceeded, fmt.Sprintf("Revision has not rolled out for %d minutes.", pd)) + reconcileErr = nil + res = ctrl.Result{} + } else if _, found := c.progressDeadlineCheckInFlight.Load(existingRev.GetUID()); !found && reconcileErr == nil { + c.progressDeadlineCheckInFlight.Store(existingRev.GetUID(), true) + res = ctrl.Result{RequeueAfter: timeout} + } + } + } // Do checks before any Update()s, as Update() may modify the resource structure! updateStatus := !equality.Semantic.DeepEqual(existingRev.Status, reconciledRev.Status) @@ -299,10 +319,29 @@ type Sourcerer interface { } func (c *ClusterExtensionRevisionReconciler) SetupWithManager(mgr ctrl.Manager) error { + skipProgressDeadlineExceededPredicate := predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + rev, ok := e.ObjectNew.(*ocv1.ClusterExtensionRevision) + if !ok { + return true + } + // allow deletions to happen + if !rev.DeletionTimestamp.IsZero() { + return true + } + if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded { + return false + } + return true + }, + } return ctrl.NewControllerManagedBy(mgr). For( &ocv1.ClusterExtensionRevision{}, - builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), + builder.WithPredicates( + predicate.ResourceVersionChangedPredicate{}, + skipProgressDeadlineExceededPredicate, + ), ). WatchesRawSource( c.TrackingCache.Source( diff --git a/manifests/experimental-e2e.yaml b/manifests/experimental-e2e.yaml index 8d56b5ad4d..edd046e6a8 100644 --- a/manifests/experimental-e2e.yaml +++ b/manifests/experimental-e2e.yaml @@ -778,6 +778,16 @@ spec: x-kubernetes-validations: - message: phases is immutable rule: self == oldSelf || oldSelf.size() == 0 + progressDeadlineMinutes: + description: |- + progressDeadlineMinutes is an optional field that defines the maximum period + of time in minutes after which an installation should be considered failed and + require manual intervention. This functionality is disabled when no value + is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + format: int32 + maximum: 720 + minimum: 10 + type: integer revision: description: |- revision is a required, immutable sequence number representing a specific revision @@ -1052,6 +1062,16 @@ spec: rule: self == oldSelf - message: namespace must be a valid DNS1123 label rule: self.matches("^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + progressDeadlineMinutes: + description: |- + progressDeadlineMinutes is an optional field that defines the maximum period + of time in minutes after which an installation should be considered failed and + require manual intervention. This functionality is disabled when no value + is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + format: int32 + maximum: 720 + minimum: 10 + type: integer serviceAccount: description: |- serviceAccount specifies a ServiceAccount used to perform all interactions with the cluster diff --git a/manifests/experimental.yaml b/manifests/experimental.yaml index 324a0fe4c5..8775523955 100644 --- a/manifests/experimental.yaml +++ b/manifests/experimental.yaml @@ -739,6 +739,16 @@ spec: x-kubernetes-validations: - message: phases is immutable rule: self == oldSelf || oldSelf.size() == 0 + progressDeadlineMinutes: + description: |- + progressDeadlineMinutes is an optional field that defines the maximum period + of time in minutes after which an installation should be considered failed and + require manual intervention. This functionality is disabled when no value + is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + format: int32 + maximum: 720 + minimum: 10 + type: integer revision: description: |- revision is a required, immutable sequence number representing a specific revision @@ -1013,6 +1023,16 @@ spec: rule: self == oldSelf - message: namespace must be a valid DNS1123 label rule: self.matches("^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") + progressDeadlineMinutes: + description: |- + progressDeadlineMinutes is an optional field that defines the maximum period + of time in minutes after which an installation should be considered failed and + require manual intervention. This functionality is disabled when no value + is provided. The minimum period is 10 minutes, and the maximum is 720 minutes (12 hours). + format: int32 + maximum: 720 + minimum: 10 + type: integer serviceAccount: description: |- serviceAccount specifies a ServiceAccount used to perform all interactions with the cluster diff --git a/test/e2e/features/install.feature b/test/e2e/features/install.feature index ba59ffe7dc..ab87b5f31c 100644 --- a/test/e2e/features/install.feature +++ b/test/e2e/features/install.feature @@ -297,3 +297,35 @@ Feature: Install ClusterExtension valid: true mutate: true """ + + @BoxcutterRuntime + @ProgressDeadline + Scenario: Report ClusterExtension as not progressing if the rollout does not complete within given timeout + Given min value for ClusterExtension .spec.progressDeadlineMinutes is set to 1 + And min value for ClusterExtensionRevision .spec.progressDeadlineMinutes is set to 1 + When ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + progressDeadlineMinutes: 1 + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + version: 1.0.3 + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + Then ClusterExtensionRevision "${NAME}-1" reports Progressing as False with Reason ProgressDeadlineExceeded + And ClusterExtension reports Progressing as False with Reason ProgressDeadlineExceeded and Message: + """ + Revision has not rolled out for 1 minutes. + """ + And ClusterExtension reports Progressing transition between 1 and 2 minutes since its creation diff --git a/test/e2e/steps/steps.go b/test/e2e/steps/steps.go index e1a55934dd..8a06161975 100644 --- a/test/e2e/steps/steps.go +++ b/test/e2e/steps/steps.go @@ -60,6 +60,7 @@ func RegisterSteps(sc *godog.ScenarioContext) { sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+)$`, ClusterExtensionReportsConditionWithoutMsg) sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+)$`, ClusterExtensionReportsConditionWithoutReason) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+)$`, ClusterExtensionRevisionReportsConditionWithoutMsg) + sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) transition between (\d+) and (\d+) minutes since its creation$`, ClusterExtensionReportsConditionTransitionTime) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" is archived$`, ClusterExtensionRevisionIsArchived) sc.Step(`^(?i)resource "([^"]+)" is installed$`, ResourceAvailable) @@ -88,6 +89,8 @@ func RegisterSteps(sc *godog.ScenarioContext) { sc.Step(`^(?i)operator "([^"]+)" target namespace is "([^"]+)"$`, OperatorTargetNamespace) sc.Step(`^(?i)Prometheus metrics are returned in the response$`, PrometheusMetricsAreReturned) + + sc.Step(`^(?i)min value for (ClusterExtension|ClusterExtensionRevision) ((?:\.[a-zA-Z]+)+) is set to (\d+)$`, SetCRDFieldMinValue) } func init() { @@ -314,6 +317,36 @@ func ClusterExtensionReportsConditionWithoutReason(ctx context.Context, conditio return waitForExtensionCondition(ctx, conditionType, conditionStatus, nil, nil) } +func ClusterExtensionReportsConditionTransitionTime(ctx context.Context, conditionType string, minMinutes, maxMinutes int) error { + sc := scenarioCtx(ctx) + t := godog.T(ctx) + + // Get the ClusterExtension's creation timestamp and condition's lastTransitionTime + v, err := k8sClient("get", "clusterextension", sc.clusterExtensionName, "-o", + fmt.Sprintf("jsonpath={.metadata.creationTimestamp},{.status.conditions[?(@.type==\"%s\")].lastTransitionTime}", conditionType)) + require.NoError(t, err) + + parts := strings.Split(v, ",") + require.Len(t, parts, 2, "expected creationTimestamp and lastTransitionTime but got: %s", v) + + creationTimestamp, err := time.Parse(time.RFC3339, parts[0]) + require.NoError(t, err, "failed to parse creationTimestamp") + + lastTransitionTime, err := time.Parse(time.RFC3339, parts[1]) + require.NoError(t, err, "failed to parse lastTransitionTime") + + transitionDuration := lastTransitionTime.Sub(creationTimestamp) + minDuration := time.Duration(minMinutes) * time.Minute + maxDuration := time.Duration(maxMinutes) * time.Minute + + require.GreaterOrEqual(t, transitionDuration, minDuration, + "condition %s transitioned too early: %v since creation (expected >= %v)", conditionType, transitionDuration, minDuration) + require.LessOrEqual(t, transitionDuration, maxDuration, + "condition %s transitioned too late: %v since creation (expected <= %v)", conditionType, transitionDuration, maxDuration) + + return nil +} + func ClusterExtensionReportsActiveRevisions(ctx context.Context, rawRevisionNames string) error { sc := scenarioCtx(ctx) expectedRevisionNames := sets.New[string]() @@ -730,3 +763,30 @@ func MarkTestOperatorNotReady(ctx context.Context, state string) error { _, err = k8sClient("exec", podName, "-n", sc.namespace, "--", op, "/var/www/ready") return err } + +// SetCRDFieldMinValue patches a CRD to set the minimum value for a field. +// jsonPath is in the format ".spec.fieldName" and gets converted to the CRD schema path. +func SetCRDFieldMinValue(_ context.Context, resourceType, jsonPath string, minValue int) error { + var crdName string + switch resourceType { + case "ClusterExtension": + crdName = "clusterextensions.olm.operatorframework.io" + case "ClusterExtensionRevision": + crdName = "clusterextensionrevisions.olm.operatorframework.io" + default: + return fmt.Errorf("unsupported resource type: %s", resourceType) + } + + // Convert JSON path like ".spec.progressDeadlineMinutes" to CRD schema path + // e.g., ".spec.progressDeadlineMinutes" -> "properties/spec/properties/progressDeadlineMinutes" + parts := strings.Split(strings.TrimPrefix(jsonPath, "."), ".") + schemaParts := make([]string, 0, 2*len(parts)) + for _, part := range parts { + schemaParts = append(schemaParts, "properties", part) + } + patchPath := fmt.Sprintf("/spec/versions/0/schema/openAPIV3Schema/%s/minimum", strings.Join(schemaParts, "/")) + + patch := fmt.Sprintf(`[{"op": "replace", "path": "%s", "value": %d}]`, patchPath, minValue) + _, err := k8sClient("patch", "crd", crdName, "--type=json", "-p", patch) + return err +} diff --git a/testdata/images/bundles/test-operator/v1.0.3/manifests/bundle.configmap.yaml b/testdata/images/bundles/test-operator/v1.0.3/manifests/bundle.configmap.yaml new file mode 100644 index 0000000000..43b73e2c70 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.3/manifests/bundle.configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: wrong-test-configmap +# need such config map with a wrong field so that we can reach progression deadline timeouts +wrongfield: + name: "test-configmap" diff --git a/testdata/images/bundles/test-operator/v1.0.3/manifests/testoperator.clusterserviceversion.yaml b/testdata/images/bundles/test-operator/v1.0.3/manifests/testoperator.clusterserviceversion.yaml new file mode 100644 index 0000000000..a33f5a6c65 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.3/manifests/testoperator.clusterserviceversion.yaml @@ -0,0 +1,151 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: ClusterServiceVersion +metadata: + annotations: + alm-examples: |- + [ + { + "apiVersion": "olme2etests.olm.operatorframework.io/v1", + "kind": "OLME2ETests", + "metadata": { + "labels": { + "app.kubernetes.io/managed-by": "kustomize", + "app.kubernetes.io/name": "test" + }, + "name": "test-sample" + }, + "spec": null + } + ] + capabilities: Basic Install + createdAt: "2024-10-24T19:21:40Z" + operators.operatorframework.io/builder: operator-sdk-v1.34.1 + operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 + name: testoperator.v1.0.2 + namespace: placeholder +spec: + apiservicedefinitions: {} + customresourcedefinitions: + owned: + - description: Configures subsections of Alertmanager configuration specific to each namespace + displayName: OLME2ETest + kind: OLME2ETest + name: olme2etests.olm.operatorframework.io + version: v1 + description: OLM E2E Testing Operator with a wrong image ref + displayName: test-operator + icon: + - base64data: "" + mediatype: "" + install: + spec: + deployments: + - label: + app.kubernetes.io/component: controller + app.kubernetes.io/name: test-operator + app.kubernetes.io/version: 1.0.2 + name: test-operator + spec: + replicas: 1 + selector: + matchLabels: + app: olme2etest + template: + metadata: + labels: + app: olme2etest + spec: + terminationGracePeriodSeconds: 0 + volumes: + - name: scripts + configMap: + name: httpd-script + defaultMode: 0755 + containers: + - name: busybox-httpd-container + # This image ref is wrong and should trigger ImagePullBackOff condition + image: busybox:1.36 + serviceAccountName: simple-bundle-manager + clusterPermissions: + - rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create + serviceAccountName: simple-bundle-manager + permissions: + - rules: + - apiGroups: + - "" + resources: + - configmaps + - serviceaccounts + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - get + - list + - create + - update + - delete + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + serviceAccountName: simple-bundle-manager + strategy: deployment + installModes: + - supported: false + type: OwnNamespace + - supported: true + type: SingleNamespace + - supported: false + type: MultiNamespace + - supported: true + type: AllNamespaces + keywords: + - registry + links: + - name: simple-bundle + url: https://simple-bundle.domain + maintainers: + - email: main#simple-bundle.domain + name: Simple Bundle + maturity: beta + provider: + name: Simple Bundle + url: https://simple-bundle.domain + version: 1.0.2 diff --git a/testdata/images/bundles/test-operator/v1.0.3/metadata/annotations.yaml b/testdata/images/bundles/test-operator/v1.0.3/metadata/annotations.yaml new file mode 100644 index 0000000000..404f0f4a34 --- /dev/null +++ b/testdata/images/bundles/test-operator/v1.0.3/metadata/annotations.yaml @@ -0,0 +1,10 @@ +annotations: + # Core bundle annotations. + operators.operatorframework.io.bundle.mediatype.v1: registry+v1 + operators.operatorframework.io.bundle.manifests.v1: manifests/ + operators.operatorframework.io.bundle.metadata.v1: metadata/ + operators.operatorframework.io.bundle.package.v1: test + operators.operatorframework.io.bundle.channels.v1: beta + operators.operatorframework.io.metrics.builder: operator-sdk-v1.28.0 + operators.operatorframework.io.metrics.mediatype.v1: metrics+v1 + operators.operatorframework.io.metrics.project_layout: unknown diff --git a/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml b/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml index 111c75f42c..012afbe830 100644 --- a/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml +++ b/testdata/images/catalogs/test-catalog/v1/configs/catalog.yaml @@ -8,6 +8,7 @@ package: test entries: - name: test-operator.1.0.0 - name: test-operator.1.0.2 + - name: test-operator.1.0.3 --- schema: olm.channel name: beta @@ -50,6 +51,17 @@ properties: packageName: test version: 1.0.2 --- +# Bundle with an invalid config map ensure that we can never successfully rollout - used to test progression deadline timeouts +schema: olm.bundle +name: test-operator.1.0.3 +package: test +image: docker-registry.operator-controller-e2e.svc.cluster.local:5000/bundles/registry-v1/test-operator:v1.0.3 +properties: + - type: olm.package + value: + packageName: test + version: 1.0.3 +--- schema: olm.bundle name: test-operator.1.2.0 package: test