diff --git a/CHANGELOG.md b/CHANGELOG.md index 834b5030..28b8cc30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ - test: ZooKeeper 3.9.2 removed ([#654]). - test: Remove HDFS `3.3.4`, `3.3.6`, and `3.4.0` ([#655]). - test: HBase 2.4.18 removed ([#659]): +- Remove operator support for HBase 2.4 including the JMX exporter ([#672]). [#640]: https://github.com/stackabletech/hbase-operator/pull/640 [#645]: https://github.com/stackabletech/hbase-operator/pull/645 @@ -49,6 +50,7 @@ [#659]: https://github.com/stackabletech/hbase-operator/pull/659 [#660]: https://github.com/stackabletech/hbase-operator/pull/660 [#661]: https://github.com/stackabletech/hbase-operator/pull/661 +[#672]: https://github.com/stackabletech/hbase-operator/pull/672 ## [25.3.0] - 2025-03-21 diff --git a/docs/modules/hbase/pages/usage-guide/monitoring.adoc b/docs/modules/hbase/pages/usage-guide/monitoring.adoc index c9c06eec..2c9b7570 100644 --- a/docs/modules/hbase/pages/usage-guide/monitoring.adoc +++ b/docs/modules/hbase/pages/usage-guide/monitoring.adoc @@ -8,4 +8,3 @@ Starting with HBase 2.6 the URL for Prometheus metrics has changed. This is because HBase offers now a built-in endpoint for this purpose. This endpoint is available from the UI service. For example, in the case of the master service, the URL is `http://:16010/prometheus`. -The old URL `http://:9100` is still available for HBase 2.4. diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 4737ecc4..effd6792 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -5,8 +5,7 @@ use stackable_operator::{ }; use crate::crd::{ - AnyServiceConfig, CONFIG_DIR_NAME, HbaseRole, JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, - v1alpha1, + AnyServiceConfig, CONFIG_DIR_NAME, HbaseRole, JVM_SECURITY_PROPERTIES_FILE, v1alpha1, }; const JAVA_HEAP_FACTOR: f32 = 0.8; @@ -53,18 +52,11 @@ pub fn construct_role_specific_non_heap_jvm_args( hbase: &v1alpha1::HbaseCluster, hbase_role: &HbaseRole, role_group: &str, - product_version: &str, ) -> Result { let mut jvm_args = vec![format!( "-Djava.security.properties={CONFIG_DIR_NAME}/{JVM_SECURITY_PROPERTIES_FILE}" )]; - // Starting with HBase 2.6 the JVM exporter is not needed anymore - if product_version.starts_with("2.4") || product_version.starts_with("2.5") { - jvm_args.push( - format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/{hbase_role}.yaml") - ); - } if hbase.has_kerberos_enabled() { jvm_args.push("-Djava.security.krb5.conf=/stackable/kerberos/krb5.conf".to_owned()); } @@ -168,17 +160,11 @@ mod tests { default: replicas: 1 "#; - let (hbase, hbase_role, merged_config, role_group, product_version) = - construct_boilerplate(input); + let (hbase, hbase_role, merged_config, role_group) = construct_boilerplate(input); let global_jvm_args = construct_global_jvm_args(false); - let role_specific_non_heap_jvm_args = construct_role_specific_non_heap_jvm_args( - &hbase, - &hbase_role, - &role_group, - &product_version, - ) - .unwrap(); + let role_specific_non_heap_jvm_args = + construct_role_specific_non_heap_jvm_args(&hbase, &hbase_role, &role_group).unwrap(); let hbase_heapsize_env = construct_hbase_heapsize_env(&merged_config).unwrap(); assert_eq!(global_jvm_args, ""); @@ -230,17 +216,11 @@ mod tests { - -Xmx40000m # This has no effect! - -Dhttps.proxyPort=1234 "#; - let (hbase, hbase_role, merged_config, role_group, product_version) = - construct_boilerplate(input); + let (hbase, hbase_role, merged_config, role_group) = construct_boilerplate(input); let global_jvm_args = construct_global_jvm_args(hbase.has_kerberos_enabled()); - let role_specific_non_heap_jvm_args = construct_role_specific_non_heap_jvm_args( - &hbase, - &hbase_role, - &role_group, - &product_version, - ) - .unwrap(); + let role_specific_non_heap_jvm_args = + construct_role_specific_non_heap_jvm_args(&hbase, &hbase_role, &role_group).unwrap(); let hbase_heapsize_env = construct_hbase_heapsize_env(&merged_config).unwrap(); assert_eq!( @@ -260,13 +240,7 @@ mod tests { fn construct_boilerplate( hbase_cluster: &str, - ) -> ( - v1alpha1::HbaseCluster, - HbaseRole, - AnyServiceConfig, - String, - String, - ) { + ) -> (v1alpha1::HbaseCluster, HbaseRole, AnyServiceConfig, String) { let hbase: v1alpha1::HbaseCluster = serde_yaml::from_str(hbase_cluster).expect("illegal test input"); @@ -274,14 +248,7 @@ mod tests { let merged_config = hbase .merged_config(&hbase_role, "default", "my-hdfs") .unwrap(); - let product_version = hbase.spec.image.product_version().to_owned(); - ( - hbase, - hbase_role, - merged_config, - "default".to_owned(), - product_version, - ) + (hbase, hbase_role, merged_config, "default".to_owned()) } } diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index dd878acf..b3483f3c 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -66,7 +66,6 @@ pub const HBASE_UI_PORT_NAME_HTTP: &str = "ui-http"; pub const HBASE_UI_PORT_NAME_HTTPS: &str = "ui-https"; pub const HBASE_REST_PORT_NAME_HTTP: &str = "rest-http"; pub const HBASE_REST_PORT_NAME_HTTPS: &str = "rest-https"; -pub const METRICS_PORT_NAME: &str = "metrics"; pub const HBASE_MASTER_PORT: u16 = 16000; // HBase always uses 16010, regardless of http or https. On 2024-01-17 we decided in Arch-meeting that we want to stick @@ -77,9 +76,6 @@ pub const HBASE_REGIONSERVER_PORT: u16 = 16020; pub const HBASE_REGIONSERVER_UI_PORT: u16 = 16030; pub const HBASE_REST_PORT: u16 = 8080; pub const HBASE_REST_UI_PORT: u16 = 8085; -// This port is only used by Hbase prior to version 2.6 with a third-party JMX exporter. -// Newer versions use the same port as the UI because Hbase provides it's own metrics API -pub const METRICS_PORT: u16 = 9100; const DEFAULT_REGION_MOVER_TIMEOUT: Duration = Duration::from_minutes_unchecked(59); const DEFAULT_REGION_MOVER_DELTA_TO_SHUTDOWN: Duration = Duration::from_minutes_unchecked(1); @@ -494,11 +490,10 @@ impl v1alpha1::HbaseCluster { } /// Returns required port name and port number tuples depending on the role. - /// Hbase versions 2.4.* will have three ports for each role /// Hbase versions 2.6.* will have two ports for each role. The metrics are available over the /// UI port. - pub fn ports(&self, role: &HbaseRole, hbase_version: &str) -> Vec<(String, u16)> { - let result_without_metric_port: Vec<(String, u16)> = match role { + pub fn ports(&self, role: &HbaseRole) -> Vec<(String, u16)> { + match role { HbaseRole::Master => vec![ ("master".to_string(), HBASE_MASTER_PORT), (self.ui_port_name(), HBASE_MASTER_UI_PORT), @@ -519,14 +514,6 @@ impl v1alpha1::HbaseCluster { ), (self.ui_port_name(), HBASE_REST_UI_PORT), ], - }; - if hbase_version.starts_with(r"2.4") { - result_without_metric_port - .into_iter() - .chain(vec![(METRICS_PORT_NAME.to_string(), METRICS_PORT)]) - .collect() - } else { - result_without_metric_port } } @@ -956,7 +943,7 @@ impl Configuration for HbaseConfigFragment { HBASE_ENV_SH => { // The contents of this file cannot be built entirely here because we don't have // access to the clusterConfig or product version. - // These are needed to set up Kerberos and JMX exporter settings. + // These are needed to set up Kerberos. // To avoid fragmentation of the code needed to build this file, we moved the // implementation to the hbase_controller::build_hbase_env_sh() function. } @@ -1107,7 +1094,7 @@ impl Configuration for RegionServerConfigFragment { HBASE_ENV_SH => { // The contents of this file cannot be built entirely here because we don't have // access to the clusterConfig or product version. - // These are needed to set up Kerberos and JMX exporter settings. + // These are needed to set up Kerberos. // To avoid fragmentation of the code needed to build this file, we moved the // implementation to the hbase_controller::build_hbase_env_sh() function. } diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 91554814..33758dd1 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -336,8 +336,6 @@ pub async fn reconcile_hbase( let client = &ctx.client; - validate_cr(hbase)?; - let resolved_product_image = hbase .spec .image @@ -515,7 +513,7 @@ pub fn build_region_server_role_service( .server_role_service_name() .context(GlobalServiceNameNotFoundSnafu)?; let ports = hbase - .ports(&role, &resolved_product_image.product_version) + .ports(&role) .into_iter() .map(|(name, value)| ServicePort { name: Some(name), @@ -601,13 +599,8 @@ fn build_rolegroup_config_map( ); } PropertyNameKind::File(file_name) if file_name == HBASE_ENV_SH => { - let mut hbase_env_config = build_hbase_env_sh( - hbase, - merged_config, - &hbase_role, - &rolegroup.role_group, - &resolved_product_image.product_version, - )?; + let mut hbase_env_config = + build_hbase_env_sh(hbase, merged_config, &hbase_role, &rolegroup.role_group)?; // configOverride come last hbase_env_config.extend(config.clone()); @@ -691,15 +684,11 @@ fn build_rolegroup_config_map( builder.add_data(SSL_CLIENT_XML, ssl_client_xml); } - extend_role_group_config_map( - rolegroup, - merged_config.logging(), - &mut builder, - &resolved_product_image.product_version, - ) - .context(InvalidLoggingConfigSnafu { - cm_name: rolegroup.object_name(), - })?; + extend_role_group_config_map(rolegroup, merged_config.logging(), &mut builder).context( + InvalidLoggingConfigSnafu { + cm_name: rolegroup.object_name(), + }, + )?; builder.build().map_err(|e| Error::BuildRoleGroupConfig { source: e, @@ -717,7 +706,7 @@ fn build_rolegroup_service( resolved_product_image: &ResolvedProductImage, ) -> Result { let ports = hbase - .ports(hbase_role, &resolved_product_image.product_version) + .ports(hbase_role) .into_iter() .map(|(name, value)| ServicePort { name: Some(name), @@ -783,7 +772,7 @@ fn build_rolegroup_statefulset( let hbase_version = &resolved_product_image.app_version_label; let ports = hbase - .ports(hbase_role, &resolved_product_image.product_version) + .ports(hbase_role) .into_iter() .map(|(name, value)| ContainerPort { name: Some(name), @@ -1099,7 +1088,6 @@ fn build_hbase_env_sh( merged_config: &AnyServiceConfig, hbase_role: &HbaseRole, role_group: &str, - product_version: &str, ) -> Result, Error> { let mut result = BTreeMap::new(); @@ -1114,7 +1102,7 @@ fn build_hbase_env_sh( construct_global_jvm_args(hbase.has_kerberos_enabled()), ); let role_specific_non_heap_jvm_args = - construct_role_specific_non_heap_jvm_args(hbase, hbase_role, role_group, product_version) + construct_role_specific_non_heap_jvm_args(hbase, hbase_role, role_group) .context(ConstructJvmArgumentSnafu)?; match hbase_role { HbaseRole::Master => { @@ -1140,24 +1128,6 @@ fn build_hbase_env_sh( Ok(result) } -/// Ensures that no authorization is configured for HBase versions that do not support it. -/// In the future, such validations should be moved to the CRD CEL rules which are much more flexible -/// and have to added benefit that invalid CRs are rejected by the API server. -/// A requirement for this is that the minimum supported Kubernetes version is 1.29. -fn validate_cr(hbase: &v1alpha1::HbaseCluster) -> Result<()> { - tracing::info!("Begin CR validation"); - - let hbase_version = hbase.spec.image.product_version(); - let authorization = hbase.spec.cluster_config.authorization.is_some(); - - if hbase_version.starts_with("2.4") && authorization { - tracing::error!("Invalid custom resource"); - return Err(Error::AuthorizationNotSupported); - } - tracing::info!("End CR validation"); - Ok(()) -} - /// Build the domain name of an HBase service pod. /// The hbase-entrypoint.sh script uses this to build the fully qualified name of a pod /// by appending it to the `HOSTNAME` environment variable. diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 072493d6..61db3245 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -43,9 +43,7 @@ pub enum Error { type Result = std::result::Result; const CONSOLE_CONVERSION_PATTERN: &str = "%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n"; -const HBASE_LOG4J_FILE: &str = "hbase.log4j.xml"; const HBASE_LOG4J2_FILE: &str = "hbase.log4j2.xml"; -pub const LOG4J_CONFIG_FILE: &str = "log4j.properties"; pub const LOG4J2_CONFIG_FILE: &str = "log4j2.properties"; pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; pub static CONTAINERDEBUG_LOG_DIRECTORY: std::sync::LazyLock = @@ -56,16 +54,12 @@ pub fn extend_role_group_config_map( rolegroup: &RoleGroupRef, logging: &Logging, cm_builder: &mut ConfigMapBuilder, - hbase_version: &str, ) -> Result<()> { if let Some(ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Automatic(log_config)), }) = logging.containers.get(&Container::Hbase) { - cm_builder.add_data( - log4j_properties_file_name(hbase_version), - log4j_config(hbase_version, log_config), - ); + cm_builder.add_data(LOG4J2_CONFIG_FILE, log4j_config(log_config)); } let vector_log_config = if let Some(ContainerLogConfig { @@ -87,41 +81,15 @@ pub fn extend_role_group_config_map( Ok(()) } -pub fn log4j_properties_file_name(hbase_version: &str) -> &'static str { - if needs_log4j2(hbase_version) { - LOG4J2_CONFIG_FILE - } else { - LOG4J_CONFIG_FILE - } -} - -fn log4j_config(hbase_version: &str, log_config: &AutomaticContainerLogConfig) -> String { - if needs_log4j2(hbase_version) { - product_logging::framework::create_log4j2_config( - &format!("{STACKABLE_LOG_DIR}/hbase"), - HBASE_LOG4J2_FILE, - MAX_HBASE_LOG_FILES_SIZE - .scale_to(BinaryMultiple::Mebi) - .floor() - .value as u32, - CONSOLE_CONVERSION_PATTERN, - log_config, - ) - } else { - product_logging::framework::create_log4j_config( - &format!("{STACKABLE_LOG_DIR}/hbase"), - HBASE_LOG4J_FILE, - MAX_HBASE_LOG_FILES_SIZE - .scale_to(BinaryMultiple::Mebi) - .floor() - .value as u32, - CONSOLE_CONVERSION_PATTERN, - log_config, - ) - } -} - -// HBase 2.6 moved from log4j to log4j2 -fn needs_log4j2(hbase_version: &str) -> bool { - !hbase_version.starts_with(r"2.4") +fn log4j_config(log_config: &AutomaticContainerLogConfig) -> String { + product_logging::framework::create_log4j2_config( + &format!("{STACKABLE_LOG_DIR}/hbase"), + HBASE_LOG4J2_FILE, + MAX_HBASE_LOG_FILES_SIZE + .scale_to(BinaryMultiple::Mebi) + .floor() + .value as u32, + CONSOLE_CONVERSION_PATTERN, + log_config, + ) } diff --git a/tests/templates/kuttl/opa/30-install-hbase.yaml.j2 b/tests/templates/kuttl/opa/30-install-hbase.yaml.j2 index 92fda28c..fea35545 100644 --- a/tests/templates/kuttl/opa/30-install-hbase.yaml.j2 +++ b/tests/templates/kuttl/opa/30-install-hbase.yaml.j2 @@ -28,12 +28,6 @@ commands: appender.console.name = STDOUT appender.console.layout.type = PatternLayout appender.console.layout.pattern = [%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %c{1} - %msg%n - log4j.properties: | - # Used by HBase 2.4 - log4j.rootLogger=INFO, FILE - log4j.appender.FILE=org.apache.log4j.FileAppender - log4j.appender.FILE.File=/stackable/log/hbase/hbase.log4j.xml - log4j.appender.FILE.layout=org.apache.log4j.xml.XMLLayout --- apiVersion: hbase.stackable.tech/v1alpha1 kind: HbaseCluster diff --git a/tests/templates/kuttl/smoke/50-assert.yaml b/tests/templates/kuttl/smoke/50-assert.yaml index b7f37813..5ae54c4c 100644 --- a/tests/templates/kuttl/smoke/50-assert.yaml +++ b/tests/templates/kuttl/smoke/50-assert.yaml @@ -5,4 +5,5 @@ metadata: name: test-hbase commands: - script: kubectl exec --namespace=$NAMESPACE hbase-test-runner-0 -- python /tmp/test-hbase.py http://test-hbase-restserver-default:8080 + - script: kubectl exec --namespace=$NAMESPACE hbase-test-runner-0 -- python /tmp/test_prometheus_metrics.py $NAMESPACE timeout: 240 diff --git a/tests/templates/kuttl/smoke/50-test-hbase.yaml b/tests/templates/kuttl/smoke/50-test-hbase.yaml index 6863fc61..d7b83feb 100644 --- a/tests/templates/kuttl/smoke/50-test-hbase.yaml +++ b/tests/templates/kuttl/smoke/50-test-hbase.yaml @@ -3,3 +3,4 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep commands: - script: kubectl cp --namespace=$NAMESPACE ./test-hbase.py hbase-test-runner-0:/tmp + - script: kubectl cp --namespace=$NAMESPACE ./test_prometheus_metrics.py hbase-test-runner-0:/tmp diff --git a/tests/templates/kuttl/smoke/test_prometheus_metrics.py b/tests/templates/kuttl/smoke/test_prometheus_metrics.py new file mode 100644 index 00000000..7e8c36ab --- /dev/null +++ b/tests/templates/kuttl/smoke/test_prometheus_metrics.py @@ -0,0 +1,66 @@ +# Fetch metrics from the built-in Prometheus endpoint of HDFS components. + +import logging +import sys + +import requests + + +def check_metrics( + namespace: str, role: str, port: int, expected_metrics: list[str] +) -> None: + response: requests.Response = requests.get( + f"http://test-hbase-{role}-default-0.test-hbase-{role}-default.{namespace}.svc.cluster.local:{port}/prometheus", + timeout=10, + ) + assert response.ok, "Requesting metrics failed" + + # Split the response into lines to check for metric names at the beginning of each line. + # This is a bit slower than using a regex but it allows to use special characters like "{}" in metric names + # without needing to escape them. + response_lines = response.text.splitlines() + for metric in expected_metrics: + # Use any() with a generator to stop early if the metric is found. + assert any((line.startswith(metric) for line in response_lines)) is True, ( + f"Metric '{metric}' not found for {role}" + ) + + +def check_master_metrics( + namespace: str, +) -> None: + expected_metrics: list[str] = ["master_queue_size"] + + check_metrics(namespace, "master", 16010, expected_metrics) + + +def check_regionserver_metrics( + namespace: str, +) -> None: + expected_metrics: list[str] = ["region_server_queue_size"] + + check_metrics(namespace, "regionserver", 16030, expected_metrics) + + +def check_restserver_metrics( + namespace: str, +) -> None: + expected_metrics: list[str] = ["ugi_metrics_get_groups_num_ops"] + + check_metrics(namespace, "restserver", 8085, expected_metrics) + + +if __name__ == "__main__": + namespace_arg: str = sys.argv[1] + + logging.basicConfig( + level="DEBUG", + format="%(asctime)s %(levelname)s: %(message)s", + stream=sys.stdout, + ) + + check_master_metrics(namespace_arg) + check_regionserver_metrics(namespace_arg) + check_restserver_metrics(namespace_arg) + + print("All expected metrics found")