From 30acbebfb346fa3af47c1546adb5d0bb4c36166c Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 12 May 2026 16:51:51 +0200 Subject: [PATCH 1/5] refactor: extract dereference/validate pipeline from reconcile_hive Move external resource resolution (product image, S3 connection, metadata database, OPA config) into controller::dereference module with its own error enum. Extract config validation and merging into validate_cluster(), which produces a ValidatedHiveCluster proving all product-config validation succeeded before any Kubernetes resources are created. The validated struct owns the resolved product image and per-role/ per-rolegroup merged configs. Existing build functions are unchanged and receive their parameters from the validated structs. Co-Authored-By: Claude Opus 4.6 --- rust/operator-binary/src/controller.rs | 348 ++++++++++-------- .../src/controller/dereference.rs | 95 +++++ rust/operator-binary/src/crd/mod.rs | 2 +- 3 files changed, 290 insertions(+), 155 deletions(-) create mode 100644 rust/operator-binary/src/controller/dereference.rs diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index ade37ef0..a10b0397 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -1,5 +1,7 @@ //! Ensures that `Pod`s are configured and running for each [`v1alpha1::HiveCluster`] +pub mod dereference; + use std::{ borrow::Cow, collections::{BTreeMap, HashMap}, @@ -35,8 +37,7 @@ use stackable_operator::{ cli::OperatorEnvironmentOptions, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, commons::{ - product_image_selection::{self, ResolvedProductImage}, - rbac::build_rbac_resources, + product_image_selection::ResolvedProductImage, rbac::build_rbac_resources, secret_class::SecretClassVolumeProvisionParts, }, constants::RESTART_CONTROLLER_ENABLED_LABEL, @@ -129,6 +130,31 @@ pub struct Ctx { pub operator_environment: OperatorEnvironmentOptions, } +/// Per-role configuration extracted during validation. +#[derive(Clone, Debug)] +pub struct ValidatedRoleConfig { + pub pdb: stackable_operator::commons::pdb::PdbConfig, + pub listener_class: String, +} + +/// Per-rolegroup configuration: the merged CRD config plus the product-config properties. +#[derive(Clone, Debug)] +pub struct ValidatedRoleGroupConfig { + pub merged_config: MetaStoreConfig, + pub product_config_properties: HashMap>, +} + +pub use crate::controller::dereference::DereferencedObjects; + +/// The validated cluster: proves that product-config validation and config merging +/// succeeded for every role and role group before any resources are created. +#[derive(Clone, Debug)] +pub struct ValidatedHiveCluster { + pub image: ResolvedProductImage, + pub role_groups: BTreeMap>, + pub role_configs: BTreeMap, +} + #[derive(Snafu, Debug, EnumDiscriminants)] #[strum_discriminants(derive(strum::IntoStaticStr))] #[allow(clippy::enum_variant_names)] @@ -320,25 +346,15 @@ pub enum Error { #[snafu(display("failed to configure service"))] ServiceConfiguration { source: crate::service::Error }, - #[snafu(display("failed to resolve product image"))] - ResolveProductImage { - source: product_image_selection::Error, - }, - - #[snafu(display("invalid OpaConfig"))] - InvalidOpaConfig { - source: stackable_operator::commons::opa::Error, + #[snafu(display("failed to dereference cluster resources"))] + Dereference { + source: crate::controller::dereference::Error, }, #[snafu(display("failed to build TLS certificate SecretClass Volume"))] TlsCertSecretClassVolumeBuild { source: stackable_operator::builder::pod::volume::SecretOperatorVolumeSourceBuilderError, }, - - #[snafu(display("invalid metadata database connection"))] - InvalidMetadataDatabaseConnection { - source: stackable_operator::database_connections::Error, - }, } type Result = std::result::Result; @@ -348,55 +364,15 @@ impl ReconcilerError for Error { } } -pub async fn reconcile_hive( - hive: Arc>, - ctx: Arc, -) -> Result { - tracing::info!("Starting reconcile"); - let hive = hive - .0 - .as_ref() - .map_err(error_boundary::InvalidObject::clone) - .context(InvalidHiveClusterSnafu)?; - let client = &ctx.client; - let hive_namespace = hive.namespace().context(ObjectHasNoNamespaceSnafu)?; - - let resolved_product_image = hive - .spec - .image - .resolve( - CONTAINER_IMAGE_BASE_NAME, - &ctx.operator_environment.image_repository, - crate::built_info::PKG_VERSION, - ) - .context(ResolveProductImageSnafu)?; +fn validate_cluster( + hive: &v1alpha1::HiveCluster, + dereferenced: &DereferencedObjects, + product_config_manager: &ProductConfigManager, +) -> Result { let role = hive.spec.metastore.as_ref().context(NoMetaStoreRoleSnafu)?; - let hive_role = HiveRole::MetaStore; - - let s3_connection_spec: Option = - if let Some(s3) = &hive.spec.cluster_config.s3 { - Some( - s3.clone() - .resolve( - client, - &hive.namespace().ok_or(Error::ObjectHasNoNamespace)?, - ) - .await - .context(ConfigureS3ConnectionSnafu)?, - ) - } else { - None - }; - - let metadata_database_connection_details = hive - .spec - .cluster_config - .metadata_database - .jdbc_connection_details("METADATA") - .context(InvalidMetadataDatabaseConnectionSnafu)?; let validated_config = validate_all_roles_and_groups_config( - &resolved_product_image.product_version, + &dereferenced.resolved_product_image.product_version, &transform_all_roles_to_config( hive, &[( @@ -414,17 +390,89 @@ pub async fn reconcile_hive( .into(), ) .context(GenerateProductConfigSnafu)?, - &ctx.product_config, + product_config_manager, false, false, ) .context(InvalidProductConfigSnafu)?; + let mut role_groups = BTreeMap::new(); + let mut role_configs = BTreeMap::new(); + let metastore_config = validated_config .get(&HiveRole::MetaStore.to_string()) .map(Cow::Borrowed) .unwrap_or_default(); + let hive_role = HiveRole::MetaStore; + + if let Some(HiveMetastoreRoleConfig { + common: GenericRoleConfig { + pod_disruption_budget: pdb, + }, + listener_class, + }) = hive.role_config(&hive_role) + { + role_configs.insert( + hive_role.clone(), + ValidatedRoleConfig { + pdb: pdb.clone(), + listener_class: listener_class.clone(), + }, + ); + } + + let mut group_configs = BTreeMap::new(); + for (rolegroup_name, rolegroup_config) in metastore_config.iter() { + let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); + + let merged_config = hive + .merged_config(&hive_role, &rolegroup) + .context(FailedToResolveResourceConfigSnafu)?; + + group_configs.insert( + rolegroup_name.clone(), + ValidatedRoleGroupConfig { + merged_config, + product_config_properties: rolegroup_config.clone(), + }, + ); + } + + role_groups.insert(hive_role, group_configs); + + Ok(ValidatedHiveCluster { + image: dereferenced.resolved_product_image.clone(), + role_groups, + role_configs, + }) +} + +pub async fn reconcile_hive( + hive: Arc>, + ctx: Arc, +) -> Result { + tracing::info!("Starting reconcile"); + let hive = hive + .0 + .as_ref() + .map_err(error_boundary::InvalidObject::clone) + .context(InvalidHiveClusterSnafu)?; + let client = &ctx.client; + let hive_namespace = hive.namespace().context(ObjectHasNoNamespaceSnafu)?; + + let dereferenced = crate::controller::dereference::dereference( + client, + hive, + CONTAINER_IMAGE_BASE_NAME, + &ctx.operator_environment.image_repository, + crate::built_info::PKG_VERSION, + ) + .await + .context(DereferenceSnafu)?; + + let validated = validate_cluster(hive, &dereferenced, &ctx.product_config)?; + let mut cluster_resources = ClusterResources::new( APP_NAME, OPERATOR_NAME, @@ -454,112 +502,104 @@ pub async fn reconcile_hive( .await .context(ApplyRoleBindingSnafu)?; - let hive_opa_config = match hive.get_opa_config() { - Some(opa_config) => Some( - HiveOpaConfig::from_opa_config(client, hive, opa_config) - .await - .context(InvalidOpaConfigSnafu)?, - ), - None => None, - }; - let mut ss_cond_builder = StatefulSetConditionBuilder::default(); - for (rolegroup_name, rolegroup_config) in metastore_config.iter() { - let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); - - let config = hive - .merged_config(&HiveRole::MetaStore, &rolegroup) - .context(FailedToResolveResourceConfigSnafu)?; - - let rg_metrics_service = - build_rolegroup_metrics_service(hive, &resolved_product_image, &rolegroup) - .context(ServiceConfigurationSnafu)?; - - let rg_headless_service = - build_rolegroup_headless_service(hive, &resolved_product_image, &rolegroup) - .context(ServiceConfigurationSnafu)?; - - let rg_configmap = build_metastore_rolegroup_config_map( - hive, - &hive_namespace, - &resolved_product_image, - &rolegroup, - rolegroup_config, - &metadata_database_connection_details, - s3_connection_spec.as_ref(), - &config, - &client.kubernetes_cluster_info, - hive_opa_config.as_ref(), - )?; - let rg_statefulset = build_metastore_rolegroup_statefulset( - hive, - &hive_role, - &resolved_product_image, - &rolegroup, - rolegroup_config, - &metadata_database_connection_details, - s3_connection_spec.as_ref(), - &config, - &rbac_sa.name_any(), - hive_opa_config.as_ref(), - )?; - - cluster_resources - .add(client, rg_metrics_service) + for (hive_role, role_group_configs) in &validated.role_groups { + if let Some(role_config) = validated.role_configs.get(hive_role) { + add_pdbs( + &role_config.pdb, + hive, + hive_role, + client, + &mut cluster_resources, + ) .await - .context(ApplyRoleGroupServiceSnafu { - rolegroup: rolegroup.clone(), - })?; + .context(FailedToCreatePdbSnafu)?; + } - cluster_resources - .add(client, rg_headless_service) - .await - .context(ApplyRoleGroupServiceSnafu { - rolegroup: rolegroup.clone(), - })?; + for (rolegroup_name, validated_rg_config) in role_group_configs { + let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); + + let rg_metrics_service = + build_rolegroup_metrics_service(hive, &validated.image, &rolegroup) + .context(ServiceConfigurationSnafu)?; + + let rg_headless_service = + build_rolegroup_headless_service(hive, &validated.image, &rolegroup) + .context(ServiceConfigurationSnafu)?; + + let rg_configmap = build_metastore_rolegroup_config_map( + hive, + &hive_namespace, + &validated.image, + &rolegroup, + &validated_rg_config.product_config_properties, + &dereferenced.metadata_database_connection_details, + dereferenced.s3_connection_spec.as_ref(), + &validated_rg_config.merged_config, + &client.kubernetes_cluster_info, + dereferenced.hive_opa_config.as_ref(), + )?; + let rg_statefulset = build_metastore_rolegroup_statefulset( + hive, + hive_role, + &validated.image, + &rolegroup, + &validated_rg_config.product_config_properties, + &dereferenced.metadata_database_connection_details, + dereferenced.s3_connection_spec.as_ref(), + &validated_rg_config.merged_config, + &rbac_sa.name_any(), + dereferenced.hive_opa_config.as_ref(), + )?; - cluster_resources - .add(client, rg_configmap) - .await - .context(ApplyRoleGroupConfigSnafu { - rolegroup: rolegroup.clone(), - })?; - - // Note: The StatefulSet needs to be applied after all ConfigMaps and Secrets it mounts - // to prevent unnecessary Pod restarts. - // See https://github.com/stackabletech/commons-operator/issues/111 for details. - ss_cond_builder.add( cluster_resources - .add(client, rg_statefulset) + .add(client, rg_metrics_service) .await - .context(ApplyRoleGroupStatefulSetSnafu { + .context(ApplyRoleGroupServiceSnafu { rolegroup: rolegroup.clone(), - })?, - ); - } + })?; - let role_config = hive.role_config(&hive_role); - if let Some(HiveMetastoreRoleConfig { - common: GenericRoleConfig { - pod_disruption_budget: pdb, - }, - .. - }) = role_config - { - add_pdbs(pdb, hive, &hive_role, client, &mut cluster_resources) - .await - .context(FailedToCreatePdbSnafu)?; + cluster_resources + .add(client, rg_headless_service) + .await + .context(ApplyRoleGroupServiceSnafu { + rolegroup: rolegroup.clone(), + })?; + + cluster_resources.add(client, rg_configmap).await.context( + ApplyRoleGroupConfigSnafu { + rolegroup: rolegroup.clone(), + }, + )?; + + // Note: The StatefulSet needs to be applied after all ConfigMaps and Secrets it mounts + // to prevent unnecessary Pod restarts. + // See https://github.com/stackabletech/commons-operator/issues/111 for details. + ss_cond_builder.add( + cluster_resources + .add(client, rg_statefulset) + .await + .context(ApplyRoleGroupStatefulSetSnafu { + rolegroup: rolegroup.clone(), + })?, + ); + } } // std's SipHasher is deprecated, and DefaultHasher is unstable across Rust releases. // We don't /need/ stability, but it's still nice to avoid spurious changes where possible. let mut discovery_hash = FnvHasher::with_key(0); - if let Some(HiveMetastoreRoleConfig { listener_class, .. }) = role_config { - let role_listener: Listener = - build_role_listener(hive, &resolved_product_image, &hive_role, listener_class) - .context(ListenerConfigurationSnafu)?; + let hive_role = HiveRole::MetaStore; + if let Some(role_config) = validated.role_configs.get(&hive_role) { + let role_listener: Listener = build_role_listener( + hive, + &validated.image, + &hive_role, + &role_config.listener_class, + ) + .context(ListenerConfigurationSnafu)?; let listener = cluster_resources.add(client, role_listener).await.context( ApplyGroupListenerSnafu { role: hive_role.to_string(), @@ -570,7 +610,7 @@ pub async fn reconcile_hive( hive, hive, hive_role, - &resolved_product_image, + &validated.image, None, listener, ) diff --git a/rust/operator-binary/src/controller/dereference.rs b/rust/operator-binary/src/controller/dereference.rs new file mode 100644 index 00000000..79bcfed7 --- /dev/null +++ b/rust/operator-binary/src/controller/dereference.rs @@ -0,0 +1,95 @@ +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + commons::product_image_selection::{self, ResolvedProductImage}, + crd::s3, + database_connections::drivers::jdbc::JdbcDatabaseConnectionDetails, + kube::ResourceExt, +}; + +use crate::{config::opa::HiveOpaConfig, crd::v1alpha1}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to resolve product image"))] + ResolveProductImage { + source: product_image_selection::Error, + }, + + #[snafu(display("object defines no namespace"))] + ObjectHasNoNamespace, + + #[snafu(display("failed to configure S3 connection"))] + ConfigureS3Connection { + source: s3::v1alpha1::ConnectionError, + }, + + #[snafu(display("invalid metadata database connection"))] + InvalidMetadataDatabaseConnection { + source: stackable_operator::database_connections::Error, + }, + + #[snafu(display("invalid OpaConfig"))] + InvalidOpaConfig { + source: stackable_operator::commons::opa::Error, + }, +} + +/// External references resolved during the dereference step. +pub struct DereferencedObjects { + pub resolved_product_image: ResolvedProductImage, + pub s3_connection_spec: Option, + pub metadata_database_connection_details: JdbcDatabaseConnectionDetails, + pub hive_opa_config: Option, +} + +pub async fn dereference( + client: &stackable_operator::client::Client, + hive: &v1alpha1::HiveCluster, + image_base_name: &str, + image_repository: &str, + pkg_version: &str, +) -> Result { + let resolved_product_image = hive + .spec + .image + .resolve(image_base_name, image_repository, pkg_version) + .context(ResolveProductImageSnafu)?; + + let s3_connection_spec: Option = + if let Some(s3) = &hive.spec.cluster_config.s3 { + Some( + s3.clone() + .resolve( + client, + &hive.namespace().ok_or(Error::ObjectHasNoNamespace)?, + ) + .await + .context(ConfigureS3ConnectionSnafu)?, + ) + } else { + None + }; + + let metadata_database_connection_details = hive + .spec + .cluster_config + .metadata_database + .jdbc_connection_details("METADATA") + .context(InvalidMetadataDatabaseConnectionSnafu)?; + + let hive_opa_config = match hive.get_opa_config() { + Some(opa_config) => Some( + HiveOpaConfig::from_opa_config(client, hive, opa_config) + .await + .context(InvalidOpaConfigSnafu)?, + ), + None => None, + }; + + Ok(DereferencedObjects { + resolved_product_image, + s3_connection_spec, + metadata_database_connection_details, + hive_opa_config, + }) +} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 7e4edc63..2b046f18 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -383,7 +383,7 @@ pub struct HdfsConnection { pub config_map: String, } -#[derive(Display, EnumString, EnumIter)] +#[derive(Clone, Debug, Display, EnumString, EnumIter, Eq, Hash, Ord, PartialEq, PartialOrd)] #[strum(serialize_all = "camelCase")] pub enum HiveRole { #[strum(serialize = "metastore")] From 6da580cae18b4f37df3afd50aa37aeb5a1ec21f5 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 12 May 2026 17:15:50 +0200 Subject: [PATCH 2/5] refactor: extract validate_cluster into controller::validate module Co-Authored-By: Claude Opus 4.6 --- rust/operator-binary/src/controller.rs | 138 +---------------- .../src/controller/validate.rs | 144 ++++++++++++++++++ 2 files changed, 152 insertions(+), 130 deletions(-) create mode 100644 rust/operator-binary/src/controller/validate.rs diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index a10b0397..eb1f9023 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -1,9 +1,9 @@ //! Ensures that `Pod`s are configured and running for each [`v1alpha1::HiveCluster`] pub mod dereference; +pub mod validate; use std::{ - borrow::Cow, collections::{BTreeMap, HashMap}, hash::Hasher, sync::Arc, @@ -64,7 +64,6 @@ use stackable_operator::{ kvp::{Labels, ObjectLabels}, logging::controller::ReconcilerError, memory::{BinaryMultiple, MemoryQuantity}, - product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ self, framework::{ @@ -75,7 +74,7 @@ use stackable_operator::{ CustomContainerLogConfig, }, }, - role_utils::{GenericRoleConfig, RoleGroupRef}, + role_utils::RoleGroupRef, shared::time::Duration, status::condition::{ compute_conditions, operations::ClusterOperationsConditionBuilder, @@ -101,7 +100,7 @@ use crate::{ STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR_NAME, STACKABLE_LOG_DIR, STACKABLE_LOG_DIR_NAME, databases::{MetadataDatabaseConnection, derby_driver_class}, - v1alpha1::{self, HiveMetastoreRoleConfig}, + v1alpha1, }, discovery::{self}, kerberos::{ @@ -130,31 +129,6 @@ pub struct Ctx { pub operator_environment: OperatorEnvironmentOptions, } -/// Per-role configuration extracted during validation. -#[derive(Clone, Debug)] -pub struct ValidatedRoleConfig { - pub pdb: stackable_operator::commons::pdb::PdbConfig, - pub listener_class: String, -} - -/// Per-rolegroup configuration: the merged CRD config plus the product-config properties. -#[derive(Clone, Debug)] -pub struct ValidatedRoleGroupConfig { - pub merged_config: MetaStoreConfig, - pub product_config_properties: HashMap>, -} - -pub use crate::controller::dereference::DereferencedObjects; - -/// The validated cluster: proves that product-config validation and config merging -/// succeeded for every role and role group before any resources are created. -#[derive(Clone, Debug)] -pub struct ValidatedHiveCluster { - pub image: ResolvedProductImage, - pub role_groups: BTreeMap>, - pub role_configs: BTreeMap, -} - #[derive(Snafu, Debug, EnumDiscriminants)] #[strum_discriminants(derive(strum::IntoStaticStr))] #[allow(clippy::enum_variant_names)] @@ -162,9 +136,6 @@ pub enum Error { #[snafu(display("object defines no namespace"))] ObjectHasNoNamespace, - #[snafu(display("object defines no metastore role"))] - NoMetaStoreRole, - #[snafu(display("failed to apply Service for {rolegroup}"))] ApplyRoleGroupService { source: stackable_operator::cluster_resources::Error, @@ -189,16 +160,6 @@ pub enum Error { rolegroup: RoleGroupRef, }, - #[snafu(display("failed to generate product config"))] - GenerateProductConfig { - source: stackable_operator::product_config_utils::Error, - }, - - #[snafu(display("invalid product config"))] - InvalidProductConfig { - source: stackable_operator::product_config_utils::Error, - }, - #[snafu(display("object is missing metadata to build owner reference"))] ObjectMissingMetadataForOwnerRef { source: stackable_operator::builder::meta::Error, @@ -227,9 +188,6 @@ pub enum Error { ))] S3TlsNoVerificationNotSupported, - #[snafu(display("failed to resolve and merge resource config for role and role group"))] - FailedToResolveResourceConfig { source: crate::crd::Error }, - #[snafu(display("failed to create hive container [{name}]"))] FailedToCreateHiveContainer { source: stackable_operator::builder::pod::container::Error, @@ -351,6 +309,9 @@ pub enum Error { source: crate::controller::dereference::Error, }, + #[snafu(display("failed to validate cluster configuration"))] + Validate { source: validate::Error }, + #[snafu(display("failed to build TLS certificate SecretClass Volume"))] TlsCertSecretClassVolumeBuild { source: stackable_operator::builder::pod::volume::SecretOperatorVolumeSourceBuilderError, @@ -364,90 +325,6 @@ impl ReconcilerError for Error { } } -fn validate_cluster( - hive: &v1alpha1::HiveCluster, - dereferenced: &DereferencedObjects, - product_config_manager: &ProductConfigManager, -) -> Result { - let role = hive.spec.metastore.as_ref().context(NoMetaStoreRoleSnafu)?; - - let validated_config = validate_all_roles_and_groups_config( - &dereferenced.resolved_product_image.product_version, - &transform_all_roles_to_config( - hive, - &[( - HiveRole::MetaStore.to_string(), - ( - vec![ - PropertyNameKind::Env, - PropertyNameKind::Cli, - PropertyNameKind::File(HIVE_SITE_XML.to_string()), - PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), - ], - role.clone(), - ), - )] - .into(), - ) - .context(GenerateProductConfigSnafu)?, - product_config_manager, - false, - false, - ) - .context(InvalidProductConfigSnafu)?; - - let mut role_groups = BTreeMap::new(); - let mut role_configs = BTreeMap::new(); - - let metastore_config = validated_config - .get(&HiveRole::MetaStore.to_string()) - .map(Cow::Borrowed) - .unwrap_or_default(); - - let hive_role = HiveRole::MetaStore; - - if let Some(HiveMetastoreRoleConfig { - common: GenericRoleConfig { - pod_disruption_budget: pdb, - }, - listener_class, - }) = hive.role_config(&hive_role) - { - role_configs.insert( - hive_role.clone(), - ValidatedRoleConfig { - pdb: pdb.clone(), - listener_class: listener_class.clone(), - }, - ); - } - - let mut group_configs = BTreeMap::new(); - for (rolegroup_name, rolegroup_config) in metastore_config.iter() { - let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); - - let merged_config = hive - .merged_config(&hive_role, &rolegroup) - .context(FailedToResolveResourceConfigSnafu)?; - - group_configs.insert( - rolegroup_name.clone(), - ValidatedRoleGroupConfig { - merged_config, - product_config_properties: rolegroup_config.clone(), - }, - ); - } - - role_groups.insert(hive_role, group_configs); - - Ok(ValidatedHiveCluster { - image: dereferenced.resolved_product_image.clone(), - role_groups, - role_configs, - }) -} - pub async fn reconcile_hive( hive: Arc>, ctx: Arc, @@ -471,7 +348,8 @@ pub async fn reconcile_hive( .await .context(DereferenceSnafu)?; - let validated = validate_cluster(hive, &dereferenced, &ctx.product_config)?; + let validated = validate::validate_cluster(hive, &dereferenced, &ctx.product_config) + .context(ValidateSnafu)?; let mut cluster_resources = ClusterResources::new( APP_NAME, diff --git a/rust/operator-binary/src/controller/validate.rs b/rust/operator-binary/src/controller/validate.rs new file mode 100644 index 00000000..449742fb --- /dev/null +++ b/rust/operator-binary/src/controller/validate.rs @@ -0,0 +1,144 @@ +use std::{ + borrow::Cow, + collections::{BTreeMap, HashMap}, +}; + +use product_config::{ProductConfigManager, types::PropertyNameKind}; +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + commons::product_image_selection::ResolvedProductImage, + product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, + role_utils::GenericRoleConfig, +}; + +use super::dereference::DereferencedObjects; +use crate::crd::{ + HIVE_SITE_XML, HiveRole, JVM_SECURITY_PROPERTIES_FILE, MetaStoreConfig, + v1alpha1::{self, HiveMetastoreRoleConfig}, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("object defines no metastore role"))] + NoMetaStoreRole, + + #[snafu(display("failed to generate product config"))] + GenerateProductConfig { + source: stackable_operator::product_config_utils::Error, + }, + + #[snafu(display("invalid product config"))] + InvalidProductConfig { + source: stackable_operator::product_config_utils::Error, + }, + + #[snafu(display("failed to resolve and merge resource config for role and role group"))] + FailedToResolveResourceConfig { source: crate::crd::Error }, +} + +/// Per-role configuration extracted during validation. +#[derive(Clone, Debug)] +pub struct ValidatedRoleConfig { + pub pdb: stackable_operator::commons::pdb::PdbConfig, + pub listener_class: String, +} + +/// Per-rolegroup configuration: the merged CRD config plus the product-config properties. +#[derive(Clone, Debug)] +pub struct ValidatedRoleGroupConfig { + pub merged_config: MetaStoreConfig, + pub product_config_properties: HashMap>, +} + +/// The validated cluster: proves that product-config validation and config merging +/// succeeded for every role and role group before any resources are created. +#[derive(Clone, Debug)] +pub struct ValidatedHiveCluster { + pub image: ResolvedProductImage, + pub role_groups: BTreeMap>, + pub role_configs: BTreeMap, +} + +pub fn validate_cluster( + hive: &v1alpha1::HiveCluster, + dereferenced: &DereferencedObjects, + product_config_manager: &ProductConfigManager, +) -> Result { + let role = hive.spec.metastore.as_ref().context(NoMetaStoreRoleSnafu)?; + + let validated_config = validate_all_roles_and_groups_config( + &dereferenced.resolved_product_image.product_version, + &transform_all_roles_to_config( + hive, + &[( + HiveRole::MetaStore.to_string(), + ( + vec![ + PropertyNameKind::Env, + PropertyNameKind::Cli, + PropertyNameKind::File(HIVE_SITE_XML.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + ], + role.clone(), + ), + )] + .into(), + ) + .context(GenerateProductConfigSnafu)?, + product_config_manager, + false, + false, + ) + .context(InvalidProductConfigSnafu)?; + + let mut role_groups = BTreeMap::new(); + let mut role_configs = BTreeMap::new(); + + let metastore_config = validated_config + .get(&HiveRole::MetaStore.to_string()) + .map(Cow::Borrowed) + .unwrap_or_default(); + + let hive_role = HiveRole::MetaStore; + + if let Some(HiveMetastoreRoleConfig { + common: GenericRoleConfig { + pod_disruption_budget: pdb, + }, + listener_class, + }) = hive.role_config(&hive_role) + { + role_configs.insert( + hive_role.clone(), + ValidatedRoleConfig { + pdb: pdb.clone(), + listener_class: listener_class.clone(), + }, + ); + } + + let mut group_configs = BTreeMap::new(); + for (rolegroup_name, rolegroup_config) in metastore_config.iter() { + let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); + + let merged_config = hive + .merged_config(&hive_role, &rolegroup) + .context(FailedToResolveResourceConfigSnafu)?; + + group_configs.insert( + rolegroup_name.clone(), + ValidatedRoleGroupConfig { + merged_config, + product_config_properties: rolegroup_config.clone(), + }, + ); + } + + role_groups.insert(hive_role, group_configs); + + Ok(ValidatedHiveCluster { + image: dereferenced.resolved_product_image.clone(), + role_groups, + role_configs, + }) +} From 63744a2f1d36c77c2e0ccd11c5845245dc14d487 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 13 May 2026 11:41:21 +0200 Subject: [PATCH 3/5] refactor: align error variant naming with airflow and hbase operators Rename FailedToResolveResourceConfig to FailedToResolveConfig and fix OPA error display string to match the convention used across all three dereference/validate extraction PRs. Co-Authored-By: Claude Opus 4.6 --- rust/operator-binary/src/controller/dereference.rs | 2 +- rust/operator-binary/src/controller/validate.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/operator-binary/src/controller/dereference.rs b/rust/operator-binary/src/controller/dereference.rs index 79bcfed7..fb0bdf2b 100644 --- a/rust/operator-binary/src/controller/dereference.rs +++ b/rust/operator-binary/src/controller/dereference.rs @@ -28,7 +28,7 @@ pub enum Error { source: stackable_operator::database_connections::Error, }, - #[snafu(display("invalid OpaConfig"))] + #[snafu(display("invalid OPA configuration"))] InvalidOpaConfig { source: stackable_operator::commons::opa::Error, }, diff --git a/rust/operator-binary/src/controller/validate.rs b/rust/operator-binary/src/controller/validate.rs index 449742fb..7ceeaee8 100644 --- a/rust/operator-binary/src/controller/validate.rs +++ b/rust/operator-binary/src/controller/validate.rs @@ -32,8 +32,8 @@ pub enum Error { source: stackable_operator::product_config_utils::Error, }, - #[snafu(display("failed to resolve and merge resource config for role and role group"))] - FailedToResolveResourceConfig { source: crate::crd::Error }, + #[snafu(display("failed to resolve and merge config for role and role group"))] + FailedToResolveConfig { source: crate::crd::Error }, } /// Per-role configuration extracted during validation. @@ -123,7 +123,7 @@ pub fn validate_cluster( let merged_config = hive .merged_config(&hive_role, &rolegroup) - .context(FailedToResolveResourceConfigSnafu)?; + .context(FailedToResolveConfigSnafu)?; group_configs.insert( rolegroup_name.clone(), From 1b613089f5553af03037a06bd6da285570a526bc Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 13 May 2026 16:58:56 +0200 Subject: [PATCH 4/5] refactor: move product image resolution from dereference to validate Image resolution is a pure computation, not an I/O dereference, so it belongs in validate_cluster alongside the other config validation. This aligns with the pattern used by the trino and airflow operators. Co-Authored-By: Claude Opus 4.6 --- rust/operator-binary/src/controller.rs | 14 ++++++------ .../src/controller/dereference.rs | 21 +----------------- .../src/controller/validate.rs | 22 ++++++++++++++----- 3 files changed, 25 insertions(+), 32 deletions(-) diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index eb1f9023..678812c1 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -338,18 +338,18 @@ pub async fn reconcile_hive( let client = &ctx.client; let hive_namespace = hive.namespace().context(ObjectHasNoNamespaceSnafu)?; - let dereferenced = crate::controller::dereference::dereference( - client, + let dereferenced = crate::controller::dereference::dereference(client, hive) + .await + .context(DereferenceSnafu)?; + + let validated = validate::validate_cluster( hive, CONTAINER_IMAGE_BASE_NAME, &ctx.operator_environment.image_repository, crate::built_info::PKG_VERSION, + &ctx.product_config, ) - .await - .context(DereferenceSnafu)?; - - let validated = validate::validate_cluster(hive, &dereferenced, &ctx.product_config) - .context(ValidateSnafu)?; + .context(ValidateSnafu)?; let mut cluster_resources = ClusterResources::new( APP_NAME, diff --git a/rust/operator-binary/src/controller/dereference.rs b/rust/operator-binary/src/controller/dereference.rs index fb0bdf2b..77e331aa 100644 --- a/rust/operator-binary/src/controller/dereference.rs +++ b/rust/operator-binary/src/controller/dereference.rs @@ -1,20 +1,12 @@ use snafu::{ResultExt, Snafu}; use stackable_operator::{ - commons::product_image_selection::{self, ResolvedProductImage}, - crd::s3, - database_connections::drivers::jdbc::JdbcDatabaseConnectionDetails, - kube::ResourceExt, + crd::s3, database_connections::drivers::jdbc::JdbcDatabaseConnectionDetails, kube::ResourceExt, }; use crate::{config::opa::HiveOpaConfig, crd::v1alpha1}; #[derive(Snafu, Debug)] pub enum Error { - #[snafu(display("failed to resolve product image"))] - ResolveProductImage { - source: product_image_selection::Error, - }, - #[snafu(display("object defines no namespace"))] ObjectHasNoNamespace, @@ -36,7 +28,6 @@ pub enum Error { /// External references resolved during the dereference step. pub struct DereferencedObjects { - pub resolved_product_image: ResolvedProductImage, pub s3_connection_spec: Option, pub metadata_database_connection_details: JdbcDatabaseConnectionDetails, pub hive_opa_config: Option, @@ -45,16 +36,7 @@ pub struct DereferencedObjects { pub async fn dereference( client: &stackable_operator::client::Client, hive: &v1alpha1::HiveCluster, - image_base_name: &str, - image_repository: &str, - pkg_version: &str, ) -> Result { - let resolved_product_image = hive - .spec - .image - .resolve(image_base_name, image_repository, pkg_version) - .context(ResolveProductImageSnafu)?; - let s3_connection_spec: Option = if let Some(s3) = &hive.spec.cluster_config.s3 { Some( @@ -87,7 +69,6 @@ pub async fn dereference( }; Ok(DereferencedObjects { - resolved_product_image, s3_connection_spec, metadata_database_connection_details, hive_opa_config, diff --git a/rust/operator-binary/src/controller/validate.rs b/rust/operator-binary/src/controller/validate.rs index 7ceeaee8..728fce44 100644 --- a/rust/operator-binary/src/controller/validate.rs +++ b/rust/operator-binary/src/controller/validate.rs @@ -6,12 +6,11 @@ use std::{ use product_config::{ProductConfigManager, types::PropertyNameKind}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ - commons::product_image_selection::ResolvedProductImage, + commons::product_image_selection::{self, ResolvedProductImage}, product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, role_utils::GenericRoleConfig, }; -use super::dereference::DereferencedObjects; use crate::crd::{ HIVE_SITE_XML, HiveRole, JVM_SECURITY_PROPERTIES_FILE, MetaStoreConfig, v1alpha1::{self, HiveMetastoreRoleConfig}, @@ -19,6 +18,11 @@ use crate::crd::{ #[derive(Snafu, Debug)] pub enum Error { + #[snafu(display("failed to resolve product image"))] + ResolveProductImage { + source: product_image_selection::Error, + }, + #[snafu(display("object defines no metastore role"))] NoMetaStoreRole, @@ -61,13 +65,21 @@ pub struct ValidatedHiveCluster { pub fn validate_cluster( hive: &v1alpha1::HiveCluster, - dereferenced: &DereferencedObjects, + image_base_name: &str, + image_repository: &str, + pkg_version: &str, product_config_manager: &ProductConfigManager, ) -> Result { + let resolved_product_image = hive + .spec + .image + .resolve(image_base_name, image_repository, pkg_version) + .context(ResolveProductImageSnafu)?; + let role = hive.spec.metastore.as_ref().context(NoMetaStoreRoleSnafu)?; let validated_config = validate_all_roles_and_groups_config( - &dereferenced.resolved_product_image.product_version, + &resolved_product_image.product_version, &transform_all_roles_to_config( hive, &[( @@ -137,7 +149,7 @@ pub fn validate_cluster( role_groups.insert(hive_role, group_configs); Ok(ValidatedHiveCluster { - image: dereferenced.resolved_product_image.clone(), + image: resolved_product_image, role_groups, role_configs, }) From 213bac34d81a8e87436b5b563ca9bdba07229e83 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 13 May 2026 17:23:16 +0200 Subject: [PATCH 5/5] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e08b815..982dc84b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,11 +17,13 @@ All notable changes to this project will be documented in this file. This means you need to replace your simple database connection string with a typed struct. This struct is consistent between different CRDs, so that you can easily copy/paste it between stacklets. Read on the [Hive database documentation](https://docs.stackable.tech/home/nightly/hive/usage-guide/database-driver) for details ([#674]). +- Internal operator refactoring: introduce dereference() and validate() steps in the reconciler ([#707]). [#674]: https://github.com/stackabletech/hive-operator/pull/674 [#693]: https://github.com/stackabletech/hive-operator/pull/693 [#695]: https://github.com/stackabletech/hive-operator/pull/695 [#702]: https://github.com/stackabletech/hive-operator/pull/702 +[#707]: https://github.com/stackabletech/hive-operator/pull/707 ## [26.3.0] - 2026-03-16