diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e08b815..982dc84b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,11 +17,13 @@ All notable changes to this project will be documented in this file. This means you need to replace your simple database connection string with a typed struct. This struct is consistent between different CRDs, so that you can easily copy/paste it between stacklets. Read on the [Hive database documentation](https://docs.stackable.tech/home/nightly/hive/usage-guide/database-driver) for details ([#674]). +- Internal operator refactoring: introduce dereference() and validate() steps in the reconciler ([#707]). [#674]: https://github.com/stackabletech/hive-operator/pull/674 [#693]: https://github.com/stackabletech/hive-operator/pull/693 [#695]: https://github.com/stackabletech/hive-operator/pull/695 [#702]: https://github.com/stackabletech/hive-operator/pull/702 +[#707]: https://github.com/stackabletech/hive-operator/pull/707 ## [26.3.0] - 2026-03-16 diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index ade37ef0..678812c1 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -1,7 +1,9 @@ //! Ensures that `Pod`s are configured and running for each [`v1alpha1::HiveCluster`] +pub mod dereference; +pub mod validate; + use std::{ - borrow::Cow, collections::{BTreeMap, HashMap}, hash::Hasher, sync::Arc, @@ -35,8 +37,7 @@ use stackable_operator::{ cli::OperatorEnvironmentOptions, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, commons::{ - product_image_selection::{self, ResolvedProductImage}, - rbac::build_rbac_resources, + product_image_selection::ResolvedProductImage, rbac::build_rbac_resources, secret_class::SecretClassVolumeProvisionParts, }, constants::RESTART_CONTROLLER_ENABLED_LABEL, @@ -63,7 +64,6 @@ use stackable_operator::{ kvp::{Labels, ObjectLabels}, logging::controller::ReconcilerError, memory::{BinaryMultiple, MemoryQuantity}, - product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ self, framework::{ @@ -74,7 +74,7 @@ use stackable_operator::{ CustomContainerLogConfig, }, }, - role_utils::{GenericRoleConfig, RoleGroupRef}, + role_utils::RoleGroupRef, shared::time::Duration, status::condition::{ compute_conditions, operations::ClusterOperationsConditionBuilder, @@ -100,7 +100,7 @@ use crate::{ STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR_NAME, STACKABLE_LOG_DIR, STACKABLE_LOG_DIR_NAME, databases::{MetadataDatabaseConnection, derby_driver_class}, - v1alpha1::{self, HiveMetastoreRoleConfig}, + v1alpha1, }, discovery::{self}, kerberos::{ @@ -136,9 +136,6 @@ pub enum Error { #[snafu(display("object defines no namespace"))] ObjectHasNoNamespace, - #[snafu(display("object defines no metastore role"))] - NoMetaStoreRole, - #[snafu(display("failed to apply Service for {rolegroup}"))] ApplyRoleGroupService { source: stackable_operator::cluster_resources::Error, @@ -163,16 +160,6 @@ pub enum Error { rolegroup: RoleGroupRef, }, - #[snafu(display("failed to generate product config"))] - GenerateProductConfig { - source: stackable_operator::product_config_utils::Error, - }, - - #[snafu(display("invalid product config"))] - InvalidProductConfig { - source: stackable_operator::product_config_utils::Error, - }, - #[snafu(display("object is missing metadata to build owner reference"))] ObjectMissingMetadataForOwnerRef { source: stackable_operator::builder::meta::Error, @@ -201,9 +188,6 @@ pub enum Error { ))] S3TlsNoVerificationNotSupported, - #[snafu(display("failed to resolve and merge resource config for role and role group"))] - FailedToResolveResourceConfig { source: crate::crd::Error }, - #[snafu(display("failed to create hive container [{name}]"))] FailedToCreateHiveContainer { source: stackable_operator::builder::pod::container::Error, @@ -320,25 +304,18 @@ pub enum Error { #[snafu(display("failed to configure service"))] ServiceConfiguration { source: crate::service::Error }, - #[snafu(display("failed to resolve product image"))] - ResolveProductImage { - source: product_image_selection::Error, + #[snafu(display("failed to dereference cluster resources"))] + Dereference { + source: crate::controller::dereference::Error, }, - #[snafu(display("invalid OpaConfig"))] - InvalidOpaConfig { - source: stackable_operator::commons::opa::Error, - }, + #[snafu(display("failed to validate cluster configuration"))] + Validate { source: validate::Error }, #[snafu(display("failed to build TLS certificate SecretClass Volume"))] TlsCertSecretClassVolumeBuild { source: stackable_operator::builder::pod::volume::SecretOperatorVolumeSourceBuilderError, }, - - #[snafu(display("invalid metadata database connection"))] - InvalidMetadataDatabaseConnection { - source: stackable_operator::database_connections::Error, - }, } type Result = std::result::Result; @@ -361,69 +338,18 @@ pub async fn reconcile_hive( let client = &ctx.client; let hive_namespace = hive.namespace().context(ObjectHasNoNamespaceSnafu)?; - let resolved_product_image = hive - .spec - .image - .resolve( - CONTAINER_IMAGE_BASE_NAME, - &ctx.operator_environment.image_repository, - crate::built_info::PKG_VERSION, - ) - .context(ResolveProductImageSnafu)?; - let role = hive.spec.metastore.as_ref().context(NoMetaStoreRoleSnafu)?; - let hive_role = HiveRole::MetaStore; + let dereferenced = crate::controller::dereference::dereference(client, hive) + .await + .context(DereferenceSnafu)?; - let s3_connection_spec: Option = - if let Some(s3) = &hive.spec.cluster_config.s3 { - Some( - s3.clone() - .resolve( - client, - &hive.namespace().ok_or(Error::ObjectHasNoNamespace)?, - ) - .await - .context(ConfigureS3ConnectionSnafu)?, - ) - } else { - None - }; - - let metadata_database_connection_details = hive - .spec - .cluster_config - .metadata_database - .jdbc_connection_details("METADATA") - .context(InvalidMetadataDatabaseConnectionSnafu)?; - - let validated_config = validate_all_roles_and_groups_config( - &resolved_product_image.product_version, - &transform_all_roles_to_config( - hive, - &[( - HiveRole::MetaStore.to_string(), - ( - vec![ - PropertyNameKind::Env, - PropertyNameKind::Cli, - PropertyNameKind::File(HIVE_SITE_XML.to_string()), - PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), - ], - role.clone(), - ), - )] - .into(), - ) - .context(GenerateProductConfigSnafu)?, + let validated = validate::validate_cluster( + hive, + CONTAINER_IMAGE_BASE_NAME, + &ctx.operator_environment.image_repository, + crate::built_info::PKG_VERSION, &ctx.product_config, - false, - false, ) - .context(InvalidProductConfigSnafu)?; - - let metastore_config = validated_config - .get(&HiveRole::MetaStore.to_string()) - .map(Cow::Borrowed) - .unwrap_or_default(); + .context(ValidateSnafu)?; let mut cluster_resources = ClusterResources::new( APP_NAME, @@ -454,112 +380,104 @@ pub async fn reconcile_hive( .await .context(ApplyRoleBindingSnafu)?; - let hive_opa_config = match hive.get_opa_config() { - Some(opa_config) => Some( - HiveOpaConfig::from_opa_config(client, hive, opa_config) - .await - .context(InvalidOpaConfigSnafu)?, - ), - None => None, - }; - let mut ss_cond_builder = StatefulSetConditionBuilder::default(); - for (rolegroup_name, rolegroup_config) in metastore_config.iter() { - let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); - - let config = hive - .merged_config(&HiveRole::MetaStore, &rolegroup) - .context(FailedToResolveResourceConfigSnafu)?; - - let rg_metrics_service = - build_rolegroup_metrics_service(hive, &resolved_product_image, &rolegroup) - .context(ServiceConfigurationSnafu)?; - - let rg_headless_service = - build_rolegroup_headless_service(hive, &resolved_product_image, &rolegroup) - .context(ServiceConfigurationSnafu)?; - - let rg_configmap = build_metastore_rolegroup_config_map( - hive, - &hive_namespace, - &resolved_product_image, - &rolegroup, - rolegroup_config, - &metadata_database_connection_details, - s3_connection_spec.as_ref(), - &config, - &client.kubernetes_cluster_info, - hive_opa_config.as_ref(), - )?; - let rg_statefulset = build_metastore_rolegroup_statefulset( - hive, - &hive_role, - &resolved_product_image, - &rolegroup, - rolegroup_config, - &metadata_database_connection_details, - s3_connection_spec.as_ref(), - &config, - &rbac_sa.name_any(), - hive_opa_config.as_ref(), - )?; - - cluster_resources - .add(client, rg_metrics_service) + for (hive_role, role_group_configs) in &validated.role_groups { + if let Some(role_config) = validated.role_configs.get(hive_role) { + add_pdbs( + &role_config.pdb, + hive, + hive_role, + client, + &mut cluster_resources, + ) .await - .context(ApplyRoleGroupServiceSnafu { - rolegroup: rolegroup.clone(), - })?; + .context(FailedToCreatePdbSnafu)?; + } - cluster_resources - .add(client, rg_headless_service) - .await - .context(ApplyRoleGroupServiceSnafu { - rolegroup: rolegroup.clone(), - })?; + for (rolegroup_name, validated_rg_config) in role_group_configs { + let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); + + let rg_metrics_service = + build_rolegroup_metrics_service(hive, &validated.image, &rolegroup) + .context(ServiceConfigurationSnafu)?; + + let rg_headless_service = + build_rolegroup_headless_service(hive, &validated.image, &rolegroup) + .context(ServiceConfigurationSnafu)?; + + let rg_configmap = build_metastore_rolegroup_config_map( + hive, + &hive_namespace, + &validated.image, + &rolegroup, + &validated_rg_config.product_config_properties, + &dereferenced.metadata_database_connection_details, + dereferenced.s3_connection_spec.as_ref(), + &validated_rg_config.merged_config, + &client.kubernetes_cluster_info, + dereferenced.hive_opa_config.as_ref(), + )?; + let rg_statefulset = build_metastore_rolegroup_statefulset( + hive, + hive_role, + &validated.image, + &rolegroup, + &validated_rg_config.product_config_properties, + &dereferenced.metadata_database_connection_details, + dereferenced.s3_connection_spec.as_ref(), + &validated_rg_config.merged_config, + &rbac_sa.name_any(), + dereferenced.hive_opa_config.as_ref(), + )?; - cluster_resources - .add(client, rg_configmap) - .await - .context(ApplyRoleGroupConfigSnafu { - rolegroup: rolegroup.clone(), - })?; - - // Note: The StatefulSet needs to be applied after all ConfigMaps and Secrets it mounts - // to prevent unnecessary Pod restarts. - // See https://github.com/stackabletech/commons-operator/issues/111 for details. - ss_cond_builder.add( cluster_resources - .add(client, rg_statefulset) + .add(client, rg_metrics_service) .await - .context(ApplyRoleGroupStatefulSetSnafu { + .context(ApplyRoleGroupServiceSnafu { rolegroup: rolegroup.clone(), - })?, - ); - } + })?; - let role_config = hive.role_config(&hive_role); - if let Some(HiveMetastoreRoleConfig { - common: GenericRoleConfig { - pod_disruption_budget: pdb, - }, - .. - }) = role_config - { - add_pdbs(pdb, hive, &hive_role, client, &mut cluster_resources) - .await - .context(FailedToCreatePdbSnafu)?; + cluster_resources + .add(client, rg_headless_service) + .await + .context(ApplyRoleGroupServiceSnafu { + rolegroup: rolegroup.clone(), + })?; + + cluster_resources.add(client, rg_configmap).await.context( + ApplyRoleGroupConfigSnafu { + rolegroup: rolegroup.clone(), + }, + )?; + + // Note: The StatefulSet needs to be applied after all ConfigMaps and Secrets it mounts + // to prevent unnecessary Pod restarts. + // See https://github.com/stackabletech/commons-operator/issues/111 for details. + ss_cond_builder.add( + cluster_resources + .add(client, rg_statefulset) + .await + .context(ApplyRoleGroupStatefulSetSnafu { + rolegroup: rolegroup.clone(), + })?, + ); + } } // std's SipHasher is deprecated, and DefaultHasher is unstable across Rust releases. // We don't /need/ stability, but it's still nice to avoid spurious changes where possible. let mut discovery_hash = FnvHasher::with_key(0); - if let Some(HiveMetastoreRoleConfig { listener_class, .. }) = role_config { - let role_listener: Listener = - build_role_listener(hive, &resolved_product_image, &hive_role, listener_class) - .context(ListenerConfigurationSnafu)?; + let hive_role = HiveRole::MetaStore; + if let Some(role_config) = validated.role_configs.get(&hive_role) { + let role_listener: Listener = build_role_listener( + hive, + &validated.image, + &hive_role, + &role_config.listener_class, + ) + .context(ListenerConfigurationSnafu)?; let listener = cluster_resources.add(client, role_listener).await.context( ApplyGroupListenerSnafu { role: hive_role.to_string(), @@ -570,7 +488,7 @@ pub async fn reconcile_hive( hive, hive, hive_role, - &resolved_product_image, + &validated.image, None, listener, ) diff --git a/rust/operator-binary/src/controller/dereference.rs b/rust/operator-binary/src/controller/dereference.rs new file mode 100644 index 00000000..77e331aa --- /dev/null +++ b/rust/operator-binary/src/controller/dereference.rs @@ -0,0 +1,76 @@ +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + crd::s3, database_connections::drivers::jdbc::JdbcDatabaseConnectionDetails, kube::ResourceExt, +}; + +use crate::{config::opa::HiveOpaConfig, crd::v1alpha1}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("object defines no namespace"))] + ObjectHasNoNamespace, + + #[snafu(display("failed to configure S3 connection"))] + ConfigureS3Connection { + source: s3::v1alpha1::ConnectionError, + }, + + #[snafu(display("invalid metadata database connection"))] + InvalidMetadataDatabaseConnection { + source: stackable_operator::database_connections::Error, + }, + + #[snafu(display("invalid OPA configuration"))] + InvalidOpaConfig { + source: stackable_operator::commons::opa::Error, + }, +} + +/// External references resolved during the dereference step. +pub struct DereferencedObjects { + pub s3_connection_spec: Option, + pub metadata_database_connection_details: JdbcDatabaseConnectionDetails, + pub hive_opa_config: Option, +} + +pub async fn dereference( + client: &stackable_operator::client::Client, + hive: &v1alpha1::HiveCluster, +) -> Result { + let s3_connection_spec: Option = + if let Some(s3) = &hive.spec.cluster_config.s3 { + Some( + s3.clone() + .resolve( + client, + &hive.namespace().ok_or(Error::ObjectHasNoNamespace)?, + ) + .await + .context(ConfigureS3ConnectionSnafu)?, + ) + } else { + None + }; + + let metadata_database_connection_details = hive + .spec + .cluster_config + .metadata_database + .jdbc_connection_details("METADATA") + .context(InvalidMetadataDatabaseConnectionSnafu)?; + + let hive_opa_config = match hive.get_opa_config() { + Some(opa_config) => Some( + HiveOpaConfig::from_opa_config(client, hive, opa_config) + .await + .context(InvalidOpaConfigSnafu)?, + ), + None => None, + }; + + Ok(DereferencedObjects { + s3_connection_spec, + metadata_database_connection_details, + hive_opa_config, + }) +} diff --git a/rust/operator-binary/src/controller/validate.rs b/rust/operator-binary/src/controller/validate.rs new file mode 100644 index 00000000..728fce44 --- /dev/null +++ b/rust/operator-binary/src/controller/validate.rs @@ -0,0 +1,156 @@ +use std::{ + borrow::Cow, + collections::{BTreeMap, HashMap}, +}; + +use product_config::{ProductConfigManager, types::PropertyNameKind}; +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + commons::product_image_selection::{self, ResolvedProductImage}, + product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, + role_utils::GenericRoleConfig, +}; + +use crate::crd::{ + HIVE_SITE_XML, HiveRole, JVM_SECURITY_PROPERTIES_FILE, MetaStoreConfig, + v1alpha1::{self, HiveMetastoreRoleConfig}, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("failed to resolve product image"))] + ResolveProductImage { + source: product_image_selection::Error, + }, + + #[snafu(display("object defines no metastore role"))] + NoMetaStoreRole, + + #[snafu(display("failed to generate product config"))] + GenerateProductConfig { + source: stackable_operator::product_config_utils::Error, + }, + + #[snafu(display("invalid product config"))] + InvalidProductConfig { + source: stackable_operator::product_config_utils::Error, + }, + + #[snafu(display("failed to resolve and merge config for role and role group"))] + FailedToResolveConfig { source: crate::crd::Error }, +} + +/// Per-role configuration extracted during validation. +#[derive(Clone, Debug)] +pub struct ValidatedRoleConfig { + pub pdb: stackable_operator::commons::pdb::PdbConfig, + pub listener_class: String, +} + +/// Per-rolegroup configuration: the merged CRD config plus the product-config properties. +#[derive(Clone, Debug)] +pub struct ValidatedRoleGroupConfig { + pub merged_config: MetaStoreConfig, + pub product_config_properties: HashMap>, +} + +/// The validated cluster: proves that product-config validation and config merging +/// succeeded for every role and role group before any resources are created. +#[derive(Clone, Debug)] +pub struct ValidatedHiveCluster { + pub image: ResolvedProductImage, + pub role_groups: BTreeMap>, + pub role_configs: BTreeMap, +} + +pub fn validate_cluster( + hive: &v1alpha1::HiveCluster, + image_base_name: &str, + image_repository: &str, + pkg_version: &str, + product_config_manager: &ProductConfigManager, +) -> Result { + let resolved_product_image = hive + .spec + .image + .resolve(image_base_name, image_repository, pkg_version) + .context(ResolveProductImageSnafu)?; + + let role = hive.spec.metastore.as_ref().context(NoMetaStoreRoleSnafu)?; + + let validated_config = validate_all_roles_and_groups_config( + &resolved_product_image.product_version, + &transform_all_roles_to_config( + hive, + &[( + HiveRole::MetaStore.to_string(), + ( + vec![ + PropertyNameKind::Env, + PropertyNameKind::Cli, + PropertyNameKind::File(HIVE_SITE_XML.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + ], + role.clone(), + ), + )] + .into(), + ) + .context(GenerateProductConfigSnafu)?, + product_config_manager, + false, + false, + ) + .context(InvalidProductConfigSnafu)?; + + let mut role_groups = BTreeMap::new(); + let mut role_configs = BTreeMap::new(); + + let metastore_config = validated_config + .get(&HiveRole::MetaStore.to_string()) + .map(Cow::Borrowed) + .unwrap_or_default(); + + let hive_role = HiveRole::MetaStore; + + if let Some(HiveMetastoreRoleConfig { + common: GenericRoleConfig { + pod_disruption_budget: pdb, + }, + listener_class, + }) = hive.role_config(&hive_role) + { + role_configs.insert( + hive_role.clone(), + ValidatedRoleConfig { + pdb: pdb.clone(), + listener_class: listener_class.clone(), + }, + ); + } + + let mut group_configs = BTreeMap::new(); + for (rolegroup_name, rolegroup_config) in metastore_config.iter() { + let rolegroup = hive.metastore_rolegroup_ref(rolegroup_name); + + let merged_config = hive + .merged_config(&hive_role, &rolegroup) + .context(FailedToResolveConfigSnafu)?; + + group_configs.insert( + rolegroup_name.clone(), + ValidatedRoleGroupConfig { + merged_config, + product_config_properties: rolegroup_config.clone(), + }, + ); + } + + role_groups.insert(hive_role, group_configs); + + Ok(ValidatedHiveCluster { + image: resolved_product_image, + role_groups, + role_configs, + }) +} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 7e4edc63..2b046f18 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -383,7 +383,7 @@ pub struct HdfsConnection { pub config_map: String, } -#[derive(Display, EnumString, EnumIter)] +#[derive(Clone, Debug, Display, EnumString, EnumIter, Eq, Hash, Ord, PartialEq, PartialOrd)] #[strum(serialize_all = "camelCase")] pub enum HiveRole { #[strum(serialize = "metastore")]