Skip to content

Commit 40f8860

Browse files
committed
MON-4036: Add TelemeterClientConfig to ClusterMonitoring API
Migrate the telemeter-client configmap settings to a CRD field within ClusterMonitoringSpec in config/v1alpha1. The new TelemeterClientConfig struct supports: - nodeSelector: pod scheduling to specific nodes - resources: compute resource requests and limits - tolerations: pod tolerations for scheduling - topologySpreadConstraints: pod distribution across topology domains Signed-off-by: Daniel Mellado <dmellado@fedoraproject.org>
1 parent 7127010 commit 40f8860

8 files changed

Lines changed: 1539 additions & 1 deletion

File tree

config/v1alpha1/tests/clustermonitorings.config.openshift.io/ClusterMonitoringConfig.yaml

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,3 +577,256 @@ tests:
577577
prometheusOperatorAdmissionWebhookConfig:
578578
resources: []
579579
expectedError: 'spec.prometheusOperatorAdmissionWebhookConfig.resources: Invalid value: 0: spec.prometheusOperatorAdmissionWebhookConfig.resources in body should have at least 1 items'
580+
- name: Should be able to create TelemeterClientConfig with valid resources
581+
initial: |
582+
apiVersion: config.openshift.io/v1alpha1
583+
kind: ClusterMonitoring
584+
spec:
585+
telemeterClientConfig:
586+
resources:
587+
- name: "cpu"
588+
request: "1m"
589+
limit: "200m"
590+
- name: "memory"
591+
request: "40Mi"
592+
limit: "200Mi"
593+
expected: |
594+
apiVersion: config.openshift.io/v1alpha1
595+
kind: ClusterMonitoring
596+
spec:
597+
telemeterClientConfig:
598+
resources:
599+
- name: "cpu"
600+
request: "1m"
601+
limit: "200m"
602+
- name: "memory"
603+
request: "40Mi"
604+
limit: "200Mi"
605+
- name: Should be able to create TelemeterClientConfig with valid tolerations
606+
initial: |
607+
apiVersion: config.openshift.io/v1alpha1
608+
kind: ClusterMonitoring
609+
spec:
610+
telemeterClientConfig:
611+
tolerations:
612+
- key: "node-role.kubernetes.io/infra"
613+
operator: "Exists"
614+
effect: "NoSchedule"
615+
expected: |
616+
apiVersion: config.openshift.io/v1alpha1
617+
kind: ClusterMonitoring
618+
spec:
619+
telemeterClientConfig:
620+
tolerations:
621+
- key: "node-role.kubernetes.io/infra"
622+
operator: "Exists"
623+
effect: "NoSchedule"
624+
- name: Should be able to create TelemeterClientConfig with valid topologySpreadConstraints
625+
initial: |
626+
apiVersion: config.openshift.io/v1alpha1
627+
kind: ClusterMonitoring
628+
spec:
629+
telemeterClientConfig:
630+
topologySpreadConstraints:
631+
- maxSkew: 1
632+
topologyKey: topology.kubernetes.io/zone
633+
whenUnsatisfiable: DoNotSchedule
634+
labelSelector:
635+
matchLabels:
636+
app: telemeter-client
637+
- maxSkew: 2
638+
topologyKey: kubernetes.io/hostname
639+
whenUnsatisfiable: ScheduleAnyway
640+
labelSelector:
641+
matchLabels:
642+
app: telemeter-client
643+
expected: |
644+
apiVersion: config.openshift.io/v1alpha1
645+
kind: ClusterMonitoring
646+
spec:
647+
telemeterClientConfig:
648+
topologySpreadConstraints:
649+
- maxSkew: 1
650+
topologyKey: topology.kubernetes.io/zone
651+
whenUnsatisfiable: DoNotSchedule
652+
labelSelector:
653+
matchLabels:
654+
app: telemeter-client
655+
- maxSkew: 2
656+
topologyKey: kubernetes.io/hostname
657+
whenUnsatisfiable: ScheduleAnyway
658+
labelSelector:
659+
matchLabels:
660+
app: telemeter-client
661+
- name: Should be able to create TelemeterClientConfig with all fields
662+
initial: |
663+
apiVersion: config.openshift.io/v1alpha1
664+
kind: ClusterMonitoring
665+
spec:
666+
telemeterClientConfig:
667+
nodeSelector:
668+
kubernetes.io/os: linux
669+
resources:
670+
- name: "cpu"
671+
request: "1m"
672+
- name: "memory"
673+
request: "40Mi"
674+
tolerations:
675+
- key: "node-role.kubernetes.io/infra"
676+
operator: "Exists"
677+
effect: "NoSchedule"
678+
topologySpreadConstraints:
679+
- maxSkew: 1
680+
topologyKey: topology.kubernetes.io/zone
681+
whenUnsatisfiable: DoNotSchedule
682+
expected: |
683+
apiVersion: config.openshift.io/v1alpha1
684+
kind: ClusterMonitoring
685+
spec:
686+
telemeterClientConfig:
687+
nodeSelector:
688+
kubernetes.io/os: linux
689+
resources:
690+
- name: "cpu"
691+
request: "1m"
692+
- name: "memory"
693+
request: "40Mi"
694+
tolerations:
695+
- key: "node-role.kubernetes.io/infra"
696+
operator: "Exists"
697+
effect: "NoSchedule"
698+
topologySpreadConstraints:
699+
- maxSkew: 1
700+
topologyKey: topology.kubernetes.io/zone
701+
whenUnsatisfiable: DoNotSchedule
702+
- name: Should reject TelemeterClientConfig with empty object
703+
initial: |
704+
apiVersion: config.openshift.io/v1alpha1
705+
kind: ClusterMonitoring
706+
spec:
707+
telemeterClientConfig: {}
708+
expectedError: 'spec.telemeterClientConfig: Invalid value: 0: spec.telemeterClientConfig in body should have at least 1 properties'
709+
- name: Should reject TelemeterClientConfig with duplicate resource names
710+
initial: |
711+
apiVersion: config.openshift.io/v1alpha1
712+
kind: ClusterMonitoring
713+
spec:
714+
telemeterClientConfig:
715+
resources:
716+
- name: "cpu"
717+
request: "100m"
718+
- name: "cpu"
719+
request: "200m"
720+
expectedError: "Duplicate value"
721+
- name: Should reject TelemeterClientConfig with duplicate topologySpreadConstraints
722+
initial: |
723+
apiVersion: config.openshift.io/v1alpha1
724+
kind: ClusterMonitoring
725+
spec:
726+
telemeterClientConfig:
727+
topologySpreadConstraints:
728+
- maxSkew: 1
729+
topologyKey: topology.kubernetes.io/zone
730+
whenUnsatisfiable: DoNotSchedule
731+
- maxSkew: 2
732+
topologyKey: topology.kubernetes.io/zone
733+
whenUnsatisfiable: DoNotSchedule
734+
expectedError: "Duplicate value"
735+
- name: Should reject TelemeterClientConfig with too many resources
736+
initial: |
737+
apiVersion: config.openshift.io/v1alpha1
738+
kind: ClusterMonitoring
739+
spec:
740+
telemeterClientConfig:
741+
resources:
742+
- name: "cpu"
743+
request: "100m"
744+
- name: "memory"
745+
request: "64Mi"
746+
- name: "hugepages-2Mi"
747+
request: "32Mi"
748+
- name: "hugepages-1Gi"
749+
request: "1Gi"
750+
- name: "ephemeral-storage"
751+
request: "1Gi"
752+
- name: "nvidia.com/gpu"
753+
request: "1"
754+
- name: "example.com/foo"
755+
request: "1"
756+
- name: "example.com/bar"
757+
request: "1"
758+
- name: "example.com/baz"
759+
request: "1"
760+
- name: "example.com/qux"
761+
request: "1"
762+
- name: "example.com/quux"
763+
request: "1"
764+
expectedError: 'spec.telemeterClientConfig.resources: Too many: 11: must have at most 10 items'
765+
- name: Should reject TelemeterClientConfig with limit less than request
766+
initial: |
767+
apiVersion: config.openshift.io/v1alpha1
768+
kind: ClusterMonitoring
769+
spec:
770+
telemeterClientConfig:
771+
resources:
772+
- name: "cpu"
773+
request: "500m"
774+
limit: "200m"
775+
expectedError: 'spec.telemeterClientConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
776+
- name: Should reject TelemeterClientConfig with too many topologySpreadConstraints
777+
initial: |
778+
apiVersion: config.openshift.io/v1alpha1
779+
kind: ClusterMonitoring
780+
spec:
781+
telemeterClientConfig:
782+
topologySpreadConstraints:
783+
- maxSkew: 1
784+
topologyKey: "zone1"
785+
whenUnsatisfiable: DoNotSchedule
786+
- maxSkew: 1
787+
topologyKey: "zone2"
788+
whenUnsatisfiable: DoNotSchedule
789+
- maxSkew: 1
790+
topologyKey: "zone3"
791+
whenUnsatisfiable: DoNotSchedule
792+
- maxSkew: 1
793+
topologyKey: "zone4"
794+
whenUnsatisfiable: DoNotSchedule
795+
- maxSkew: 1
796+
topologyKey: "zone5"
797+
whenUnsatisfiable: DoNotSchedule
798+
- maxSkew: 1
799+
topologyKey: "zone6"
800+
whenUnsatisfiable: DoNotSchedule
801+
- maxSkew: 1
802+
topologyKey: "zone7"
803+
whenUnsatisfiable: DoNotSchedule
804+
- maxSkew: 1
805+
topologyKey: "zone8"
806+
whenUnsatisfiable: DoNotSchedule
807+
- maxSkew: 1
808+
topologyKey: "zone9"
809+
whenUnsatisfiable: DoNotSchedule
810+
- maxSkew: 1
811+
topologyKey: "zone10"
812+
whenUnsatisfiable: DoNotSchedule
813+
- maxSkew: 1
814+
topologyKey: "zone11"
815+
whenUnsatisfiable: DoNotSchedule
816+
expectedError: 'spec.telemeterClientConfig.topologySpreadConstraints: Too many: 11: must have at most 10 items'
817+
- name: Should reject TelemeterClientConfig with empty topologySpreadConstraints array
818+
initial: |
819+
apiVersion: config.openshift.io/v1alpha1
820+
kind: ClusterMonitoring
821+
spec:
822+
telemeterClientConfig:
823+
topologySpreadConstraints: []
824+
expectedError: 'spec.telemeterClientConfig.topologySpreadConstraints: Invalid value: 0: spec.telemeterClientConfig.topologySpreadConstraints in body should have at least 1 items'
825+
- name: Should reject TelemeterClientConfig with empty resources array
826+
initial: |
827+
apiVersion: config.openshift.io/v1alpha1
828+
kind: ClusterMonitoring
829+
spec:
830+
telemeterClientConfig:
831+
resources: []
832+
expectedError: 'spec.telemeterClientConfig.resources: Invalid value: 0: spec.telemeterClientConfig.resources in body should have at least 1 items'

config/v1alpha1/types_cluster_monitoring.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ type ClusterMonitoringSpec struct {
107107
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
108108
// +optional
109109
PrometheusOperatorAdmissionWebhookConfig PrometheusOperatorAdmissionWebhookConfig `json:"prometheusOperatorAdmissionWebhookConfig,omitempty,omitzero"`
110+
// telemeterClientConfig is an optional field that can be used to configure the Telemeter Client
111+
// component that runs in the openshift-monitoring namespace. The Telemeter Client collects
112+
// selected monitoring metrics and forwards them to Red Hat for telemetry purposes.
113+
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
114+
// +optional
115+
TelemeterClientConfig TelemeterClientConfig `json:"telemeterClientConfig,omitempty,omitzero"`
110116
}
111117

112118
// UserDefinedMonitoring config for user-defined projects.
@@ -566,6 +572,80 @@ type PrometheusOperatorAdmissionWebhookConfig struct {
566572
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
567573
}
568574

575+
// TelemeterClientConfig provides configuration options for the Telemeter Client component
576+
// that runs in the `openshift-monitoring` namespace. The Telemeter Client collects selected
577+
// monitoring metrics and forwards them to Red Hat for telemetry purposes.
578+
// Use this configuration to control pod scheduling and resource allocation.
579+
// +kubebuilder:validation:MinProperties=1
580+
type TelemeterClientConfig struct {
581+
// nodeSelector defines the nodes on which the Pods are scheduled.
582+
// nodeSelector is optional.
583+
//
584+
// When omitted, this means the user has no opinion and the platform is left
585+
// to choose reasonable defaults. These defaults are subject to change over time.
586+
// The current default value is `kubernetes.io/os: linux`.
587+
// When specified, nodeSelector must contain at least 1 entry and must not contain more than 10 entries.
588+
// +optional
589+
// +kubebuilder:validation:MinProperties=1
590+
// +kubebuilder:validation:MaxProperties=10
591+
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
592+
// resources defines the compute resource requests and limits for the Telemeter Client container.
593+
// This includes CPU, memory and HugePages constraints to help control scheduling and resource usage.
594+
// When not specified, defaults are used by the platform. Requests cannot exceed limits.
595+
// This field is optional.
596+
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
597+
// This is a simplified API that maps to Kubernetes ResourceRequirements.
598+
// The current default values are:
599+
// resources:
600+
// - name: cpu
601+
// request: 1m
602+
// limit: null
603+
// - name: memory
604+
// request: 40Mi
605+
// limit: null
606+
// Maximum length for this list is 10.
607+
// Minimum length for this list is 1.
608+
// Each resource name must be unique within this list.
609+
// +optional
610+
// +listType=map
611+
// +listMapKey=name
612+
// +kubebuilder:validation:MaxItems=10
613+
// +kubebuilder:validation:MinItems=1
614+
Resources []ContainerResource `json:"resources,omitempty"`
615+
// tolerations defines tolerations for the pods.
616+
// tolerations is optional.
617+
//
618+
// When omitted, this means the user has no opinion and the platform is left
619+
// to choose reasonable defaults. These defaults are subject to change over time.
620+
// Defaults are empty/unset.
621+
// Maximum length for this list is 10.
622+
// Minimum length for this list is 1.
623+
// +kubebuilder:validation:MaxItems=10
624+
// +kubebuilder:validation:MinItems=1
625+
// +listType=atomic
626+
// +optional
627+
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
628+
// topologySpreadConstraints defines rules for how Telemeter Client Pods should be distributed
629+
// across topology domains such as zones, nodes, or other user-defined labels.
630+
// topologySpreadConstraints is optional.
631+
// This helps improve high availability and resource efficiency by avoiding placing
632+
// too many replicas in the same failure domain.
633+
//
634+
// When omitted, this means no opinion and the platform is left to choose a default, which is subject to change over time.
635+
// This field maps directly to the `topologySpreadConstraints` field in the Pod spec.
636+
// Default is empty list.
637+
// Maximum length for this list is 10.
638+
// Minimum length for this list is 1.
639+
// Entries must have unique topologyKey and whenUnsatisfiable pairs.
640+
// +kubebuilder:validation:MaxItems=10
641+
// +kubebuilder:validation:MinItems=1
642+
// +listType=map
643+
// +listMapKey=topologyKey
644+
// +listMapKey=whenUnsatisfiable
645+
// +optional
646+
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
647+
}
648+
569649
// AuditProfile defines the audit log level for the Metrics Server.
570650
// +kubebuilder:validation:Enum=None;Metadata;Request;RequestResponse
571651
type AuditProfile string

0 commit comments

Comments
 (0)