From 2d188c5c78f01712ae504e70b54b3e8d76c77e88 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 5 May 2025 17:03:37 +0200 Subject: [PATCH 1/6] chore(spark): update container images for 25.7.0 --- CHANGELOG.md | 2 ++ spark-k8s/versions.py | 30 ++++++------------------------ 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c45bb7a26..e9db2c289 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ All notable changes to this project will be documented in this file. - ci: Remove Nexus steps from build, mirror and release workflows ([#1056]). Also remove the old release workflow. +- spark: remove 3.5.2 and update dependencies for 3.5.5 ([#1094]) [#1025]: https://github.com/stackabletech/docker-images/pull/1025 [#1027]: https://github.com/stackabletech/docker-images/pull/1027 @@ -76,6 +77,7 @@ All notable changes to this project will be documented in this file. [#1054]: https://github.com/stackabletech/docker-images/pull/1054 [#1055]: https://github.com/stackabletech/docker-images/pull/1055 [#1056]: https://github.com/stackabletech/docker-images/pull/1056 +[#1094]: https://github.com/stackabletech/docker-images/pull/1094 ## [25.3.0] - 2025-03-21 diff --git a/spark-k8s/versions.py b/spark-k8s/versions.py index d00619a16..c878047ee 100644 --- a/spark-k8s/versions.py +++ b/spark-k8s/versions.py @@ -1,37 +1,19 @@ versions = [ - { - "product": "3.5.2", - "java-base": "17", - "java-devel": "17", - "python": "3.11", - "hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements - "hbase": "2.6.1", # current Stackable LTS version - "aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 - "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6 - "azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 - "jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.1 - "stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 - "woodstox_core": "6.5.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 - "vector": "0.43.1", - "jmx_exporter": "1.1.0", - "tini": "0.19.0", - "hbase_connector": "1.0.1", - }, { "product": "3.5.5", "java-base": "17", "java-devel": "17", "python": "3.11", - "hadoop": "3.3.6", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements - "hbase": "2.6.1", # current Stackable LTS version - "aws_java_sdk_bundle": "1.12.367", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 - "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.3.6 + "hadoop": "3.4.1", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements + "hbase": "2.6.2", # current Stackable LTS version + "aws_java_sdk_bundle": "1.12.720", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.1 + "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.1 "azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 "jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.2 "stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 "woodstox_core": "6.5.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 - "vector": "0.43.1", - "jmx_exporter": "1.1.0", + "vector": "0.46.1", + "jmx_exporter": "1.2.0", "tini": "0.19.0", "hbase_connector": "1.0.1", }, From ebbca87814735747ba4cf367b94092704b31836a Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 6 May 2025 10:39:12 +0200 Subject: [PATCH 2/6] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d19ae8e0..3b375c8b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ All notable changes to this project will be documented in this file. - yq: Bump products to use `4.45.2` ([#1090]). - cyclonedx-bom: Bump airflow and superset to use `6.0.0` ([#1090]). - vector: Bump to `0.46.1` ([#1098]). +- spark: update dependencies for 3.5.5 ([#1094]) ### Fixed @@ -66,7 +67,7 @@ All notable changes to this project will be documented in this file. Also remove the old release workflow. - zookeeper: Remove 3.9.2 ([#1093]). - Remove ubi8-rust-builder image ([#1091]). -- spark: remove 3.5.2 and update dependencies for 3.5.5 ([#1094]) +- spark: remove 3.5.2 ([#1094]) [#1025]: https://github.com/stackabletech/docker-images/pull/1025 [#1027]: https://github.com/stackabletech/docker-images/pull/1027 From 0b6372743effabaedd8df3c6b46e5a4e7e921e0d Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 6 May 2025 10:39:32 +0200 Subject: [PATCH 3/6] remove 3.5.2 patches --- .../3.5.2/0001-Update-CycloneDX-plugin.patch | 40 ------------------- .../stackable/patches/3.5.2/patchable.toml | 2 - 2 files changed, 42 deletions(-) delete mode 100644 spark-k8s/stackable/patches/3.5.2/0001-Update-CycloneDX-plugin.patch delete mode 100644 spark-k8s/stackable/patches/3.5.2/patchable.toml diff --git a/spark-k8s/stackable/patches/3.5.2/0001-Update-CycloneDX-plugin.patch b/spark-k8s/stackable/patches/3.5.2/0001-Update-CycloneDX-plugin.patch deleted file mode 100644 index 079885f0b..000000000 --- a/spark-k8s/stackable/patches/3.5.2/0001-Update-CycloneDX-plugin.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 2f95ba96e5894cfd07eca25aef5968e6a6d543fd Mon Sep 17 00:00:00 2001 -From: Lukas Voetmand -Date: Fri, 6 Sep 2024 17:53:52 +0200 -Subject: Update CycloneDX plugin - ---- - dev/make-distribution.sh | 1 - - pom.xml | 7 ++++++- - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh -index ef7c010e930..0f4c1c74e40 100755 ---- a/dev/make-distribution.sh -+++ b/dev/make-distribution.sh -@@ -171,7 +171,6 @@ BUILD_COMMAND=("$MVN" clean package \ - -Dmaven.javadoc.skip=true \ - -Dmaven.scaladoc.skip=true \ - -Dmaven.source.skip \ -- -Dcyclonedx.skip=true \ - $@) - - # Actually build the jar -diff --git a/pom.xml b/pom.xml -index 8fe98c35846..36a800162da 100644 ---- a/pom.xml -+++ b/pom.xml -@@ -3513,7 +3513,12 @@ - - org.cyclonedx - cyclonedx-maven-plugin -- 2.7.9 -+ 2.8.0 -+ -+ application -+ 1.5 -+ false -+ - - - package diff --git a/spark-k8s/stackable/patches/3.5.2/patchable.toml b/spark-k8s/stackable/patches/3.5.2/patchable.toml deleted file mode 100644 index a8a860258..000000000 --- a/spark-k8s/stackable/patches/3.5.2/patchable.toml +++ /dev/null @@ -1,2 +0,0 @@ -upstream = "https://github.com/apache/spark.git" -base = "bb7846dd487f259994fdc69e18e03382e3f64f42" From 36c90241d2becbb5a5cf09e35cd956bfec8135b4 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 6 May 2025 10:44:05 +0200 Subject: [PATCH 4/6] explain the purpose of some args in versions.py --- spark-k8s/versions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark-k8s/versions.py b/spark-k8s/versions.py index c878047ee..65c4db53e 100644 --- a/spark-k8s/versions.py +++ b/spark-k8s/versions.py @@ -4,8 +4,8 @@ "java-base": "17", "java-devel": "17", "python": "3.11", - "hadoop": "3.4.1", # Hadoop version defined in ../hbase/versions.py to reduce build time and disk requirements - "hbase": "2.6.2", # current Stackable LTS version + "hadoop": "3.4.1", # Current Stackable LTS version. Source of the AWS and Azure artifacts to Spark's classpath. + "hbase": "2.6.2", # Current Stackable LTS version. Used to build the HBase connector. "aws_java_sdk_bundle": "1.12.720", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.1 "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.1 "azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 From 689ba8281bd6ebd4b992636ebf002de03072d3a5 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 7 May 2025 15:28:45 +0200 Subject: [PATCH 5/6] fix: aws artifact name and version --- spark-k8s/Dockerfile | 2 +- spark-k8s/versions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile index 4f20dc1bb..890fa1846 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile @@ -153,7 +153,7 @@ WORKDIR /stackable/spark-${PRODUCT}/dist/jars # Copy modules required for s3a:// COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar \ + /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE}.jar \ ./ # Copy modules required for abfs:// diff --git a/spark-k8s/versions.py b/spark-k8s/versions.py index 65c4db53e..851587858 100644 --- a/spark-k8s/versions.py +++ b/spark-k8s/versions.py @@ -6,7 +6,7 @@ "python": "3.11", "hadoop": "3.4.1", # Current Stackable LTS version. Source of the AWS and Azure artifacts to Spark's classpath. "hbase": "2.6.2", # Current Stackable LTS version. Used to build the HBase connector. - "aws_java_sdk_bundle": "1.12.720", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.1 + "aws_java_sdk_bundle": "2.24.6", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.1 "azure_storage": "7.0.1", # https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.1 "azure_keyvault_core": "1.0.0", # https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 "jackson_dataformat_xml": "2.15.2", # https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/3.5.2 From 6c4d120a23594160bdf5736cdcc3832f84ef84ff Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 16 May 2025 13:57:55 +0200 Subject: [PATCH 6/6] remove changelog entry --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9abfddb20..4e9640cdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,7 +55,6 @@ All notable changes to this project will be documented in this file. - cyclonedx-bom: Bump airflow and superset to use `6.0.0` ([#1090]). - vector: Bump to `0.46.1` ([#1098]). - spark: update dependencies for 3.5.5 ([#1094]) -- Changed default user & group IDs from 1000/1000 to 782252253/574654813 ([#916]) ### Fixed