diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d112c6f9..994084a31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ All notable changes to this project will be documented in this file. Also remove the old release workflow. - zookeeper: Remove 3.9.2 ([#1093]). - Remove ubi8-rust-builder image ([#1091]). +- hadoop: Remove `3.3.4` and `3.4.0` ([#1099]). - opa: Remove `0.67.1` ([#1103]). - opa: Remove legacy bundle-builder from container build ([#1103]). - druid: Remove `30.0.0` ([#1110]). @@ -105,6 +106,7 @@ All notable changes to this project will be documented in this file. [#1093]: https://github.com/stackabletech/docker-images/pull/1093 [#1097]: https://github.com/stackabletech/docker-images/pull/1097 [#1098]: https://github.com/stackabletech/docker-images/pull/1098 +[#1099]: https://github.com/stackabletech/docker-images/pull/1099 [#1102]: https://github.com/stackabletech/docker-images/pull/1102 [#1103]: https://github.com/stackabletech/docker-images/pull/1103 [#1106]: https://github.com/stackabletech/docker-images/pull/1106 diff --git a/hadoop/stackable/patches/3.3.4/0001-YARN-11527-Update-node.js.patch b/hadoop/stackable/patches/3.3.4/0001-YARN-11527-Update-node.js.patch deleted file mode 100644 index d50669cc9..000000000 --- a/hadoop/stackable/patches/3.3.4/0001-YARN-11527-Update-node.js.patch +++ /dev/null @@ -1,22 +0,0 @@ -From ebfaedd7b03927237db87a263d16c17b6aea00ad Mon Sep 17 00:00:00 2001 -From: Siegfried Weber -Date: Thu, 21 Dec 2023 13:51:13 +0100 -Subject: YARN-11527: Update node.js - ---- - hadoop-project/pom.xml | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml -index 0b2f6f1715..082834e261 100644 ---- a/hadoop-project/pom.xml -+++ b/hadoop-project/pom.xml -@@ -212,7 +212,7 @@ - 5.3.0 - 2.4.7 - 9.8.1 -- v12.22.1 -+ v14.17.0 - v1.22.5 - 1.10.11 - diff --git a/hadoop/stackable/patches/3.3.4/0002-Allow-overriding-datanode-registration-addresses.patch b/hadoop/stackable/patches/3.3.4/0002-Allow-overriding-datanode-registration-addresses.patch deleted file mode 100644 index f7d355d84..000000000 --- a/hadoop/stackable/patches/3.3.4/0002-Allow-overriding-datanode-registration-addresses.patch +++ /dev/null @@ -1,259 +0,0 @@ -From 570804ae570faed84b98ab67e9ff7534f458caec Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= -Date: Thu, 11 Jan 2024 14:01:02 +0100 -Subject: Allow overriding datanode registration addresses - ---- - .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ - .../blockmanagement/DatanodeManager.java | 43 +++++++----- - .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ - .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- - 4 files changed, 135 insertions(+), 22 deletions(-) - -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -index 7196def422..2c00fb4fb1 100755 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -@@ -139,6 +139,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { - public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; - public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; - public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; -+ -+ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; -+ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; -+ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; -+ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; -+ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; -+ - public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; - public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; - public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; -@@ -446,6 +453,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { - public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = - TimeUnit.SECONDS.toMillis(2); - -+ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; -+ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; - public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; - public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; - -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -index 44dffcbed1..54f6d63fa7 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -@@ -179,6 +179,8 @@ public class DatanodeManager { - private boolean hasClusterEverBeenMultiRack = false; - - private final boolean checkIpHostnameInRegistration; -+ private final boolean allowRegistrationAddressOverride; -+ - /** - * Whether we should tell datanodes what to cache in replies to - * heartbeat messages. -@@ -326,6 +328,11 @@ public class DatanodeManager { - + ": configured=" + configuredBlockInvalidateLimit - + ", counted=" + countedBlockInvalidateLimit - + ", effected=" + blockInvalidateLimit); -+ this.allowRegistrationAddressOverride = conf.getBoolean( -+ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, -+ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); -+ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY -+ + "=" + allowRegistrationAddressOverride); - this.checkIpHostnameInRegistration = conf.getBoolean( - DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, - DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); -@@ -1133,27 +1140,29 @@ public class DatanodeManager { - */ - public void registerDatanode(DatanodeRegistration nodeReg) - throws DisallowedDatanodeException, UnresolvedTopologyException { -- InetAddress dnAddress = Server.getRemoteIp(); -- if (dnAddress != null) { -- // Mostly called inside an RPC, update ip and peer hostname -- String hostname = dnAddress.getHostName(); -- String ip = dnAddress.getHostAddress(); -- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { -- // Reject registration of unresolved datanode to prevent performance -- // impact of repetitive DNS lookups later. -- final String message = "hostname cannot be resolved (ip=" -- + ip + ", hostname=" + hostname + ")"; -- LOG.warn("Unresolved datanode registration: " + message); -- throw new DisallowedDatanodeException(nodeReg, message); -+ if (!allowRegistrationAddressOverride) { -+ InetAddress dnAddress = Server.getRemoteIp(); -+ if (dnAddress != null) { -+ // Mostly called inside an RPC, update ip and peer hostname -+ String hostname = dnAddress.getHostName(); -+ String ip = dnAddress.getHostAddress(); -+ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { -+ // Reject registration of unresolved datanode to prevent performance -+ // impact of repetitive DNS lookups later. -+ final String message = "hostname cannot be resolved (ip=" -+ + ip + ", hostname=" + hostname + ")"; -+ LOG.warn("Unresolved datanode registration: " + message); -+ throw new DisallowedDatanodeException(nodeReg, message); -+ } -+ // update node registration with the ip and hostname from rpc request -+ nodeReg.setIpAddr(ip); -+ nodeReg.setPeerHostName(hostname); - } -- // update node registration with the ip and hostname from rpc request -- nodeReg.setIpAddr(ip); -- nodeReg.setPeerHostName(hostname); - } -- -+ - try { - nodeReg.setExportedKeys(blockManager.getBlockKeys()); -- -+ - // Checks if the node is not on the hosts list. If it is not, then - // it will be disallowed from registering. - if (!hostConfigManager.isIncluded(nodeReg)) { -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -index d61a17e83f..eaf4a6d7c1 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -@@ -99,6 +99,11 @@ public class DNConf { - final boolean syncOnClose; - final boolean encryptDataTransfer; - final boolean connectToDnViaHostname; -+ private final String registeredHostname; -+ private final int registeredDataPort; -+ private final int registeredHttpPort; -+ private final int registeredHttpsPort; -+ private final int registeredIpcPort; - final boolean overwriteDownstreamDerivedQOP; - private final boolean pmemCacheRecoveryEnabled; - -@@ -187,6 +192,11 @@ public class DNConf { - connectToDnViaHostname = getConf().getBoolean( - DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, - DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); -+ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); -+ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); -+ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); -+ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); -+ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); - this.blockReportInterval = getConf().getLong( - DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, - DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); -@@ -360,6 +370,66 @@ public class DNConf { - return connectToDnViaHostname; - } - -+ /** -+ * Returns a hostname to register with the cluster instead of the system -+ * hostname. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected hostname. -+ * -+ * @return null if the system hostname should be used, otherwise a hostname -+ */ -+ public String getRegisteredHostname() { -+ return registeredHostname; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * data port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredDataPort() { -+ return registeredDataPort; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * HTTP port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredHttpPort() { -+ return registeredHttpPort; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * HTTPS port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredHttpsPort() { -+ return registeredHttpsPort; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * IPC port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredIpcPort() { -+ return registeredIpcPort; -+ } -+ - /** - * Returns socket timeout - * -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -index c1507a4512..2ff7c272cf 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -@@ -82,6 +82,7 @@ import java.util.HashSet; - import java.util.Iterator; - import java.util.List; - import java.util.Map; -+import java.util.Optional; - import java.util.Map.Entry; - import java.util.Set; - import java.util.UUID; -@@ -1556,11 +1557,35 @@ public class DataNode extends ReconfigurableBase - NodeType.DATA_NODE); - } - -- DatanodeID dnId = new DatanodeID( -- streamingAddr.getAddress().getHostAddress(), hostName, -- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), -- infoSecurePort, getIpcPort()); -- return new DatanodeRegistration(dnId, storageInfo, -+ String registeredHostname = Optional -+ .ofNullable(dnConf.getRegisteredHostname()) -+ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); -+ int registeredDataPort = dnConf.getRegisteredDataPort(); -+ if (registeredDataPort == -1) { -+ registeredDataPort = getXferPort(); -+ } -+ int registeredHttpPort = dnConf.getRegisteredHttpPort(); -+ if (registeredHttpPort == -1) { -+ registeredHttpPort = getInfoPort(); -+ } -+ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); -+ if (registeredHttpsPort == -1) { -+ registeredHttpsPort = getInfoSecurePort(); -+ } -+ int registeredIpcPort = dnConf.getRegisteredIpcPort(); -+ if (registeredIpcPort == -1) { -+ registeredIpcPort = getIpcPort(); -+ } -+ -+ DatanodeID dnId = new DatanodeID(registeredHostname, -+ registeredHostname, -+ storage.getDatanodeUuid(), -+ registeredDataPort, -+ registeredHttpPort, -+ registeredHttpsPort, -+ registeredIpcPort); -+ -+ return new DatanodeRegistration(dnId, storageInfo, - new ExportedBlockKeys(), VersionInfo.getVersion()); - } - diff --git a/hadoop/stackable/patches/3.3.4/0003-HADOOP-18055-Add-async-profiler.patch b/hadoop/stackable/patches/3.3.4/0003-HADOOP-18055-Add-async-profiler.patch deleted file mode 100644 index f79e2a02a..000000000 --- a/hadoop/stackable/patches/3.3.4/0003-HADOOP-18055-Add-async-profiler.patch +++ /dev/null @@ -1,1014 +0,0 @@ -From f4a68edacf8afbf51c9ac996fa50623dd71c12b9 Mon Sep 17 00:00:00 2001 -From: Siegfried Weber -Date: Tue, 6 Feb 2024 16:10:54 +0100 -Subject: HADOOP-18055: Add async-profiler - ---- - .../org/apache/hadoop/http/HttpServer2.java | 21 + - .../hadoop/http/ProfileOutputServlet.java | 87 ++++ - .../apache/hadoop/http/ProfileServlet.java | 394 ++++++++++++++++++ - .../hadoop/http/ProfilerDisabledServlet.java | 44 ++ - .../org/apache/hadoop/util/ProcessUtils.java | 74 ++++ - .../src/main/resources/core-default.xml | 2 +- - .../src/site/markdown/AsyncProfilerServlet.md | 145 +++++++ - .../http/TestDisabledProfileServlet.java | 95 +++++ - .../hadoop-kms/src/site/markdown/index.md.vm | 5 +- - .../src/site/markdown/ServerSetup.md.vm | 5 +- - hadoop-project/src/site/site.xml | 1 + - 11 files changed, 868 insertions(+), 5 deletions(-) - create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java - create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java - create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java - create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java - create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md - create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java - -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java -index b40d60cf50..a7777f85cc 100644 ---- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java -@@ -27,6 +27,7 @@ import java.net.InetSocketAddress; - import java.net.MalformedURLException; - import java.net.URI; - import java.net.URL; -+import java.nio.file.Files; - import java.nio.file.Path; - import java.nio.file.Paths; - import java.util.List; -@@ -734,6 +735,26 @@ public final class HttpServer2 implements FilterContainer { - - addDefaultServlets(); - addPrometheusServlet(conf); -+ addAsyncProfilerServlet(contexts); -+ } -+ -+ private void addAsyncProfilerServlet(ContextHandlerCollection contexts) throws IOException { -+ final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome(); -+ if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) { -+ addServlet("prof", "/prof", ProfileServlet.class); -+ Path tmpDir = Paths.get(ProfileServlet.OUTPUT_DIR); -+ if (Files.notExists(tmpDir)) { -+ Files.createDirectories(tmpDir); -+ } -+ ServletContextHandler genCtx = new ServletContextHandler(contexts, "/prof-output-hadoop"); -+ genCtx.addServlet(ProfileOutputServlet.class, "/*"); -+ genCtx.setResourceBase(tmpDir.toAbsolutePath().toString()); -+ genCtx.setDisplayName("prof-output-hadoop"); -+ } else { -+ addServlet("prof", "/prof", ProfilerDisabledServlet.class); -+ LOG.info("ASYNC_PROFILER_HOME environment variable and async.profiler.home system property " -+ + "not specified. Disabling /prof endpoint."); -+ } - } - - private void addPrometheusServlet(Configuration conf) { -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java -new file mode 100644 -index 0000000000..1ecc21f375 ---- /dev/null -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java -@@ -0,0 +1,87 @@ -+/* -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.http; -+ -+import java.io.File; -+import java.io.IOException; -+import java.util.regex.Pattern; -+import javax.servlet.ServletException; -+import javax.servlet.http.HttpServletRequest; -+import javax.servlet.http.HttpServletResponse; -+ -+import org.eclipse.jetty.servlet.DefaultServlet; -+import org.slf4j.Logger; -+import org.slf4j.LoggerFactory; -+ -+import org.apache.hadoop.classification.InterfaceAudience; -+ -+/** -+ * Servlet to serve files generated by {@link ProfileServlet}. -+ */ -+@InterfaceAudience.Private -+public class ProfileOutputServlet extends DefaultServlet { -+ -+ private static final long serialVersionUID = 1L; -+ -+ private static final Logger LOG = LoggerFactory.getLogger(ProfileOutputServlet.class); -+ // default refresh period 2 sec -+ private static final int REFRESH_PERIOD = 2; -+ // Alphanumeric characters, plus percent (url-encoding), equals, ampersand, dot and hyphen -+ private static final Pattern ALPHA_NUMERIC = Pattern.compile("[a-zA-Z0-9%=&.\\-]*"); -+ -+ @Override -+ protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) -+ throws ServletException, IOException { -+ if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) { -+ resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); -+ ProfileServlet.setResponseHeader(resp); -+ resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!"); -+ return; -+ } -+ -+ String absoluteDiskPath = getServletContext().getRealPath(req.getPathInfo()); -+ File requestedFile = new File(absoluteDiskPath); -+ // async-profiler version 1.4 writes 'Started [cpu] profiling' to output file when profiler is -+ // running which gets replaced by final output. If final output is not ready yet, the file size -+ // will be <100 bytes (in all modes). -+ if (requestedFile.length() < 100) { -+ LOG.info("{} is incomplete. Sending auto-refresh header.", requestedFile); -+ String refreshUrl = req.getRequestURI(); -+ // Rebuild the query string (if we have one) -+ if (req.getQueryString() != null) { -+ refreshUrl += "?" + sanitize(req.getQueryString()); -+ } -+ ProfileServlet.setResponseHeader(resp); -+ resp.setHeader("Refresh", REFRESH_PERIOD + ";" + refreshUrl); -+ resp.getWriter().write("This page will be auto-refreshed every " + REFRESH_PERIOD -+ + " seconds until the output file is ready. Redirecting to " + refreshUrl); -+ } else { -+ super.doGet(req, resp); -+ } -+ } -+ -+ static String sanitize(String input) { -+ // Basic test to try to avoid any XSS attacks or HTML content showing up. -+ // Duplicates HtmlQuoting a little, but avoid destroying ampersand. -+ if (ALPHA_NUMERIC.matcher(input).matches()) { -+ return input; -+ } -+ throw new RuntimeException("Non-alphanumeric data found in input, aborting."); -+ } -+} -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -new file mode 100644 -index 0000000000..fc0ec7736e ---- /dev/null -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -@@ -0,0 +1,394 @@ -+/* -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.http; -+ -+import java.io.File; -+import java.io.IOException; -+import java.util.ArrayList; -+import java.util.List; -+import java.util.concurrent.TimeUnit; -+import java.util.concurrent.atomic.AtomicInteger; -+import java.util.concurrent.locks.Lock; -+import java.util.concurrent.locks.ReentrantLock; -+import javax.servlet.http.HttpServlet; -+import javax.servlet.http.HttpServletRequest; -+import javax.servlet.http.HttpServletResponse; -+ -+import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; -+import org.slf4j.Logger; -+import org.slf4j.LoggerFactory; -+ -+import org.apache.hadoop.classification.InterfaceAudience; -+import org.apache.hadoop.util.ProcessUtils; -+ -+/** -+ * Servlet that runs async-profiler as web-endpoint. -+ *

-+ * Following options from async-profiler can be specified as query paramater. -+ * // -e event profiling event: cpu|alloc|lock|cache-misses etc. -+ * // -d duration run profiling for 'duration' seconds (integer) -+ * // -i interval sampling interval in nanoseconds (long) -+ * // -j jstackdepth maximum Java stack depth (integer) -+ * // -b bufsize frame buffer size (long) -+ * // -t profile different threads separately -+ * // -s simple class names instead of FQN -+ * // -o fmt[,fmt...] output format: summary|traces|flat|collapsed|svg|tree|jfr|html -+ * // --width px SVG width pixels (integer) -+ * // --height px SVG frame height pixels (integer) -+ * // --minwidth px skip frames smaller than px (double) -+ * // --reverse generate stack-reversed FlameGraph / Call tree -+ *

-+ * Example: -+ * If Namenode http address is localhost:9870, and ResourceManager http address is localhost:8088, -+ * ProfileServlet running with async-profiler setup can be accessed with -+ * http://localhost:9870/prof and http://localhost:8088/prof for Namenode and ResourceManager -+ * processes respectively. -+ * Deep dive into some params: -+ * - To collect 10 second CPU profile of current process i.e. Namenode (returns FlameGraph svg) -+ * curl "http://localhost:9870/prof" -+ * - To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg) -+ * curl "http://localhost:9870/prof?pid=12345" (For instance, provide pid of Datanode) -+ * - To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg) -+ * curl "http://localhost:9870/prof?pid=12345&duration=30" -+ * - To collect 1 minute CPU profile of current process and output in tree format (html) -+ * curl "http://localhost:9870/prof?output=tree&duration=60" -+ * - To collect 10 second heap allocation profile of current process (returns FlameGraph svg) -+ * curl "http://localhost:9870/prof?event=alloc" -+ * - To collect lock contention profile of current process (returns FlameGraph svg) -+ * curl "http://localhost:9870/prof?event=lock" -+ *

-+ * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) -+ * // Perf events: -+ * // cpu -+ * // page-faults -+ * // context-switches -+ * // cycles -+ * // instructions -+ * // cache-references -+ * // cache-misses -+ * // branches -+ * // branch-misses -+ * // bus-cycles -+ * // L1-dcache-load-misses -+ * // LLC-load-misses -+ * // dTLB-load-misses -+ * // mem:breakpoint -+ * // trace:tracepoint -+ * // Java events: -+ * // alloc -+ * // lock -+ */ -+@InterfaceAudience.Private -+public class ProfileServlet extends HttpServlet { -+ -+ private static final long serialVersionUID = 1L; -+ private static final Logger LOG = LoggerFactory.getLogger(ProfileServlet.class); -+ -+ static final String ACCESS_CONTROL_ALLOW_METHODS = "Access-Control-Allow-Methods"; -+ static final String ACCESS_CONTROL_ALLOW_ORIGIN = "Access-Control-Allow-Origin"; -+ private static final String ALLOWED_METHODS = "GET"; -+ private static final String CONTENT_TYPE_TEXT = "text/plain; charset=utf-8"; -+ private static final String ASYNC_PROFILER_HOME_ENV = "ASYNC_PROFILER_HOME"; -+ private static final String ASYNC_PROFILER_HOME_SYSTEM_PROPERTY = "async.profiler.home"; -+ private static final String PROFILER_SCRIPT = "/profiler.sh"; -+ private static final int DEFAULT_DURATION_SECONDS = 10; -+ private static final AtomicInteger ID_GEN = new AtomicInteger(0); -+ -+ static final String OUTPUT_DIR = System.getProperty("java.io.tmpdir") + "/prof-output-hadoop"; -+ -+ private enum Event { -+ -+ CPU("cpu"), -+ ALLOC("alloc"), -+ LOCK("lock"), -+ PAGE_FAULTS("page-faults"), -+ CONTEXT_SWITCHES("context-switches"), -+ CYCLES("cycles"), -+ INSTRUCTIONS("instructions"), -+ CACHE_REFERENCES("cache-references"), -+ CACHE_MISSES("cache-misses"), -+ BRANCHES("branches"), -+ BRANCH_MISSES("branch-misses"), -+ BUS_CYCLES("bus-cycles"), -+ L1_DCACHE_LOAD_MISSES("L1-dcache-load-misses"), -+ LLC_LOAD_MISSES("LLC-load-misses"), -+ DTLB_LOAD_MISSES("dTLB-load-misses"), -+ MEM_BREAKPOINT("mem:breakpoint"), -+ TRACE_TRACEPOINT("trace:tracepoint"); -+ -+ private final String internalName; -+ -+ Event(final String internalName) { -+ this.internalName = internalName; -+ } -+ -+ public String getInternalName() { -+ return internalName; -+ } -+ -+ public static Event fromInternalName(final String name) { -+ for (Event event : values()) { -+ if (event.getInternalName().equalsIgnoreCase(name)) { -+ return event; -+ } -+ } -+ -+ return null; -+ } -+ } -+ -+ private enum Output { -+ SUMMARY, -+ TRACES, -+ FLAT, -+ COLLAPSED, -+ // No SVG in 2.x asyncprofiler. -+ SVG, -+ TREE, -+ JFR, -+ // In 2.x asyncprofiler, this is how you get flamegraphs. -+ HTML -+ } -+ -+ private final Lock profilerLock = new ReentrantLock(); -+ private transient volatile Process process; -+ private final String asyncProfilerHome; -+ private Integer pid; -+ -+ public ProfileServlet() { -+ this.asyncProfilerHome = getAsyncProfilerHome(); -+ this.pid = ProcessUtils.getPid(); -+ LOG.info("Servlet process PID: {} asyncProfilerHome: {}", pid, asyncProfilerHome); -+ } -+ -+ @Override -+ protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) -+ throws IOException { -+ if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) { -+ resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); -+ setResponseHeader(resp); -+ resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!"); -+ return; -+ } -+ -+ // make sure async profiler home is set -+ if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { -+ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); -+ setResponseHeader(resp); -+ resp.getWriter().write("ASYNC_PROFILER_HOME env is not set.\n\n" -+ + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" -+ + "environment is properly configured."); -+ return; -+ } -+ -+ // if pid is explicitly specified, use it else default to current process -+ pid = getInteger(req, "pid", pid); -+ -+ // if pid is not specified in query param and if current process pid cannot be determined -+ if (pid == null) { -+ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); -+ setResponseHeader(resp); -+ resp.getWriter().write( -+ "'pid' query parameter unspecified or unable to determine PID of current process."); -+ return; -+ } -+ -+ final int duration = getInteger(req, "duration", DEFAULT_DURATION_SECONDS); -+ final Output output = getOutput(req); -+ final Event event = getEvent(req); -+ final Long interval = getLong(req, "interval"); -+ final Integer jstackDepth = getInteger(req, "jstackdepth", null); -+ final Long bufsize = getLong(req, "bufsize"); -+ final boolean thread = req.getParameterMap().containsKey("thread"); -+ final boolean simple = req.getParameterMap().containsKey("simple"); -+ final Integer width = getInteger(req, "width", null); -+ final Integer height = getInteger(req, "height", null); -+ final Double minwidth = getMinWidth(req); -+ final boolean reverse = req.getParameterMap().containsKey("reverse"); -+ -+ if (process == null || !process.isAlive()) { -+ try { -+ int lockTimeoutSecs = 3; -+ if (profilerLock.tryLock(lockTimeoutSecs, TimeUnit.SECONDS)) { -+ try { -+ File outputFile = new File(OUTPUT_DIR, -+ "async-prof-pid-" + pid + "-" + event.name().toLowerCase() + "-" + ID_GEN -+ .incrementAndGet() + "." + output.name().toLowerCase()); -+ List cmd = new ArrayList<>(); -+ cmd.add(asyncProfilerHome + PROFILER_SCRIPT); -+ cmd.add("-e"); -+ cmd.add(event.getInternalName()); -+ cmd.add("-d"); -+ cmd.add("" + duration); -+ cmd.add("-o"); -+ cmd.add(output.name().toLowerCase()); -+ cmd.add("-f"); -+ cmd.add(outputFile.getAbsolutePath()); -+ if (interval != null) { -+ cmd.add("-i"); -+ cmd.add(interval.toString()); -+ } -+ if (jstackDepth != null) { -+ cmd.add("-j"); -+ cmd.add(jstackDepth.toString()); -+ } -+ if (bufsize != null) { -+ cmd.add("-b"); -+ cmd.add(bufsize.toString()); -+ } -+ if (thread) { -+ cmd.add("-t"); -+ } -+ if (simple) { -+ cmd.add("-s"); -+ } -+ if (width != null) { -+ cmd.add("--width"); -+ cmd.add(width.toString()); -+ } -+ if (height != null) { -+ cmd.add("--height"); -+ cmd.add(height.toString()); -+ } -+ if (minwidth != null) { -+ cmd.add("--minwidth"); -+ cmd.add(minwidth.toString()); -+ } -+ if (reverse) { -+ cmd.add("--reverse"); -+ } -+ cmd.add(pid.toString()); -+ process = ProcessUtils.runCmdAsync(cmd); -+ -+ // set response and set refresh header to output location -+ setResponseHeader(resp); -+ resp.setStatus(HttpServletResponse.SC_ACCEPTED); -+ String relativeUrl = "/prof-output-hadoop/" + outputFile.getName(); -+ resp.getWriter().write("Started [" + event.getInternalName() -+ + "] profiling. This page will automatically redirect to " + relativeUrl + " after " -+ + duration + " seconds. " -+ + "If empty diagram and Linux 4.6+, see 'Basic Usage' section on the Async " -+ + "Profiler Home Page, https://github.com/jvm-profiling-tools/async-profiler." -+ + "\n\nCommand:\n" + Joiner.on(" ").join(cmd)); -+ -+ // to avoid auto-refresh by ProfileOutputServlet, refreshDelay can be specified -+ // via url param -+ int refreshDelay = getInteger(req, "refreshDelay", 0); -+ -+ // instead of sending redirect, set auto-refresh so that browsers will refresh -+ // with redirected url -+ resp.setHeader("Refresh", (duration + refreshDelay) + ";" + relativeUrl); -+ resp.getWriter().flush(); -+ } finally { -+ profilerLock.unlock(); -+ } -+ } else { -+ setResponseHeader(resp); -+ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); -+ resp.getWriter() -+ .write("Unable to acquire lock. Another instance of profiler might be running."); -+ LOG.warn("Unable to acquire lock in {} seconds. Another instance of profiler might be" -+ + " running.", lockTimeoutSecs); -+ } -+ } catch (InterruptedException e) { -+ LOG.warn("Interrupted while acquiring profile lock.", e); -+ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); -+ } -+ } else { -+ setResponseHeader(resp); -+ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); -+ resp.getWriter().write("Another instance of profiler is already running."); -+ } -+ } -+ -+ private Integer getInteger(final HttpServletRequest req, final String param, -+ final Integer defaultValue) { -+ final String value = req.getParameter(param); -+ if (value != null) { -+ try { -+ return Integer.valueOf(value); -+ } catch (NumberFormatException e) { -+ return defaultValue; -+ } -+ } -+ return defaultValue; -+ } -+ -+ private Long getLong(final HttpServletRequest req, final String param) { -+ final String value = req.getParameter(param); -+ if (value != null) { -+ try { -+ return Long.valueOf(value); -+ } catch (NumberFormatException e) { -+ return null; -+ } -+ } -+ return null; -+ } -+ -+ private Double getMinWidth(final HttpServletRequest req) { -+ final String value = req.getParameter("minwidth"); -+ if (value != null) { -+ try { -+ return Double.valueOf(value); -+ } catch (NumberFormatException e) { -+ return null; -+ } -+ } -+ return null; -+ } -+ -+ private Event getEvent(final HttpServletRequest req) { -+ final String eventArg = req.getParameter("event"); -+ if (eventArg != null) { -+ Event event = Event.fromInternalName(eventArg); -+ return event == null ? Event.CPU : event; -+ } -+ return Event.CPU; -+ } -+ -+ private Output getOutput(final HttpServletRequest req) { -+ final String outputArg = req.getParameter("output"); -+ if (req.getParameter("output") != null) { -+ try { -+ return Output.valueOf(outputArg.trim().toUpperCase()); -+ } catch (IllegalArgumentException e) { -+ return Output.HTML; -+ } -+ } -+ return Output.HTML; -+ } -+ -+ static void setResponseHeader(final HttpServletResponse response) { -+ response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, ALLOWED_METHODS); -+ response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*"); -+ response.setContentType(CONTENT_TYPE_TEXT); -+ } -+ -+ static String getAsyncProfilerHome() { -+ String asyncProfilerHome = System.getenv(ASYNC_PROFILER_HOME_ENV); -+ // if ENV is not set, see if -Dasync.profiler.home=/path/to/async/profiler/home is set -+ if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { -+ asyncProfilerHome = System.getProperty(ASYNC_PROFILER_HOME_SYSTEM_PROPERTY); -+ } -+ -+ return asyncProfilerHome; -+ } -+ -+} -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java -new file mode 100644 -index 0000000000..459485ffa5 ---- /dev/null -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java -@@ -0,0 +1,44 @@ -+/* -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.http; -+ -+import java.io.IOException; -+import javax.servlet.http.HttpServlet; -+import javax.servlet.http.HttpServletRequest; -+import javax.servlet.http.HttpServletResponse; -+ -+import org.apache.hadoop.classification.InterfaceAudience; -+ -+/** -+ * Servlet for disabled async-profiler. -+ */ -+@InterfaceAudience.Private -+public class ProfilerDisabledServlet extends HttpServlet { -+ -+ @Override -+ protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) -+ throws IOException { -+ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); -+ ProfileServlet.setResponseHeader(resp); -+ resp.getWriter().write("The profiler servlet was disabled at startup.\n\n" -+ + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" -+ + "environment is properly configured."); -+ } -+ -+} -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java -new file mode 100644 -index 0000000000..cf653b9c91 ---- /dev/null -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java -@@ -0,0 +1,74 @@ -+/* -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.util; -+ -+import java.io.IOException; -+import java.lang.management.ManagementFactory; -+import java.util.List; -+ -+import org.slf4j.Logger; -+import org.slf4j.LoggerFactory; -+ -+import org.apache.hadoop.classification.InterfaceAudience; -+ -+/** -+ * Process related utilities. -+ */ -+@InterfaceAudience.Private -+public final class ProcessUtils { -+ -+ private static final Logger LOG = LoggerFactory.getLogger(ProcessUtils.class); -+ -+ private ProcessUtils() { -+ // no-op -+ } -+ -+ public static Integer getPid() { -+ // JVM_PID can be exported in service start script -+ String pidStr = System.getenv("JVM_PID"); -+ -+ // In case if it is not set correctly, fallback to mxbean which is implementation specific. -+ if (pidStr == null || pidStr.trim().isEmpty()) { -+ String name = ManagementFactory.getRuntimeMXBean().getName(); -+ if (name != null) { -+ int idx = name.indexOf("@"); -+ if (idx != -1) { -+ pidStr = name.substring(0, name.indexOf("@")); -+ } -+ } -+ } -+ try { -+ if (pidStr != null) { -+ return Integer.valueOf(pidStr); -+ } -+ } catch (NumberFormatException ignored) { -+ // ignore -+ } -+ return null; -+ } -+ -+ public static Process runCmdAsync(List cmd) { -+ try { -+ LOG.info("Running command async: {}", cmd); -+ return new ProcessBuilder(cmd).inheritIO().start(); -+ } catch (IOException e) { -+ throw new IllegalStateException(e); -+ } -+ } -+} -diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml -index f94fdebd03..56bec769c1 100644 ---- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml -+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml -@@ -69,7 +69,7 @@ - false - - Indicates if administrator ACLs are required to access -- instrumentation servlets (JMX, METRICS, CONF, STACKS). -+ instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). - - - -diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md b/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md -new file mode 100644 -index 0000000000..4b93cc219a ---- /dev/null -+++ b/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md -@@ -0,0 +1,145 @@ -+ -+ -+Async Profiler Servlet for Hadoop -+======================================== -+ -+ -+ -+Purpose -+------- -+ -+This document describes how to configure and use async profiler -+with Hadoop applications. -+Async profiler is a low overhead sampling profiler for Java that -+does not suffer from Safepoint bias problem. It features -+HotSpot-specific APIs to collect stack traces and to track memory -+allocations. The profiler works with OpenJDK, Oracle JDK and other -+Java runtimes based on the HotSpot JVM. -+ -+Hadoop profiler servlet supports Async Profiler major versions -+1.x and 2.x. -+ -+Prerequisites -+------------- -+ -+Make sure Hadoop is installed, configured and setup correctly. -+For more information see: -+ -+* [Single Node Setup](./SingleCluster.html) for first-time users. -+* [Cluster Setup](./ClusterSetup.html) for large, distributed clusters. -+ -+Go to https://github.com/jvm-profiling-tools/async-profiler, -+download a release appropriate for your platform, and install -+on every cluster host. -+ -+Set `ASYNC_PROFILER_HOME` in the environment (put it in hadoop-env.sh) -+to the root directory of the async-profiler install location, or pass -+it on the Hadoop daemon's command line as a system property as -+`-Dasync.profiler.home=/path/to/async-profiler`. -+ -+ -+Usage -+-------- -+ -+Once the prerequisites have been satisfied, access to the async-profiler -+is available by using Namenode or ResourceManager UI. -+ -+Following options from async-profiler can be specified as query paramater. -+* `-e event` profiling event: cpu|alloc|lock|cache-misses etc. -+* `-d duration` run profiling for 'duration' seconds (integer) -+* `-i interval` sampling interval in nanoseconds (long) -+* `-j jstackdepth` maximum Java stack depth (integer) -+* `-b bufsize` frame buffer size (long) -+* `-t` profile different threads separately -+* `-s` simple class names instead of FQN -+* `-o fmt[,fmt...]` output format: summary|traces|flat|collapsed|svg|tree|jfr|html -+* `--width px` SVG width pixels (integer) -+* `--height px` SVG frame height pixels (integer) -+* `--minwidth px` skip frames smaller than px (double) -+* `--reverse` generate stack-reversed FlameGraph / Call tree -+ -+ -+Example: -+If Namenode http address is localhost:9870, and ResourceManager http -+address is localhost:8088, ProfileServlet running with async-profiler -+setup can be accessed with http://localhost:9870/prof and -+http://localhost:8088/prof for Namenode and ResourceManager processes -+respectively. -+ -+Diving deep into some params: -+ -+* To collect 10 second CPU profile of current process -+ (returns FlameGraph svg) -+ * `curl http://localhost:9870/prof` (FlameGraph svg for Namenode) -+ * `curl http://localhost:8088/prof` (FlameGraph svg for ResourceManager) -+* To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg) -+ * `curl http://localhost:9870/prof?pid=12345` (For instance, provide -+ pid of Datanode here) -+* To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg) -+ * `curl http://localhost:9870/prof?pid=12345&duration=30` -+* To collect 1 minute CPU profile of current process and output in tree -+ format (html) -+ * `curl http://localhost:9870/prof?output=tree&duration=60` -+* To collect 10 second heap allocation profile of current process -+ (returns FlameGraph svg) -+ * `curl http://localhost:9870/prof?event=alloc` -+* To collect lock contention profile of current process -+ (returns FlameGraph svg) -+ * `curl http://localhost:9870/prof?event=lock` -+ -+ -+The following event types are supported by async-profiler. -+Use the 'event' parameter to specify. Default is 'cpu'. -+Not all operating systems will support all types. -+ -+Perf events: -+ -+* cpu -+* page-faults -+* context-switches -+* cycles -+* instructions -+* cache-references -+* cache-misses -+* branches -+* branch-misses -+* bus-cycles -+* L1-dcache-load-misses -+* LLC-load-misses -+* dTLB-load-misses -+ -+Java events: -+ -+* alloc -+* lock -+ -+The following output formats are supported. -+Use the 'output' parameter to specify. Default is 'flamegraph'. -+ -+Output formats: -+ -+* summary: A dump of basic profiling statistics. -+* traces: Call traces. -+* flat: Flat profile (top N hot methods). -+* collapsed: Collapsed call traces in the format used by FlameGraph -+ script. This is a collection of call stacks, where each line is a -+ semicolon separated list of frames followed by a counter. -+* svg: FlameGraph in SVG format. -+* tree: Call tree in HTML format. -+* jfr: Call traces in Java Flight Recorder format. -+ -+The 'duration' parameter specifies how long to collect trace data -+before generating output, specified in seconds. The default is 10 seconds. -+ -diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java -new file mode 100644 -index 0000000000..ce068bb6f1 ---- /dev/null -+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java -@@ -0,0 +1,95 @@ -+/* -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.http; -+ -+import java.io.IOException; -+import java.net.HttpURLConnection; -+import java.net.URL; -+import javax.servlet.http.HttpServletResponse; -+ -+import org.junit.AfterClass; -+import org.junit.BeforeClass; -+import org.junit.Test; -+ -+/** -+ * Small test to cover default disabled prof endpoint. -+ */ -+public class TestDisabledProfileServlet extends HttpServerFunctionalTest { -+ -+ private static HttpServer2 server; -+ private static URL baseUrl; -+ -+ @BeforeClass -+ public static void setup() throws Exception { -+ server = createTestServer(); -+ server.start(); -+ baseUrl = getServerURL(server); -+ } -+ -+ @AfterClass -+ public static void cleanup() throws Exception { -+ server.stop(); -+ } -+ -+ @Test -+ public void testQuery() throws Exception { -+ try { -+ readOutput(new URL(baseUrl, "/prof")); -+ throw new IllegalStateException("Should not reach here"); -+ } catch (IOException e) { -+ assertTrue(e.getMessage() -+ .contains(HttpServletResponse.SC_INTERNAL_SERVER_ERROR + " for URL: " + baseUrl)); -+ } -+ -+ // CORS headers -+ HttpURLConnection conn = -+ (HttpURLConnection) new URL(baseUrl, "/prof").openConnection(); -+ assertEquals("GET", conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_METHODS)); -+ assertNotNull(conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_ORIGIN)); -+ conn.disconnect(); -+ } -+ -+ @Test -+ public void testRequestMethods() throws IOException { -+ HttpURLConnection connection = getConnection("PUT"); -+ assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, -+ connection.getResponseCode()); -+ connection.disconnect(); -+ connection = getConnection("POST"); -+ assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, -+ connection.getResponseCode()); -+ connection.disconnect(); -+ connection = getConnection("DELETE"); -+ assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, -+ connection.getResponseCode()); -+ connection.disconnect(); -+ connection = getConnection("GET"); -+ assertEquals("Unexpected response code", HttpServletResponse.SC_INTERNAL_SERVER_ERROR, -+ connection.getResponseCode()); -+ connection.disconnect(); -+ } -+ -+ private HttpURLConnection getConnection(final String method) throws IOException { -+ URL url = new URL(baseUrl, "/prof"); -+ HttpURLConnection conn = (HttpURLConnection) url.openConnection(); -+ conn.setRequestMethod(method); -+ return conn; -+ } -+ -+} -diff --git a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm -index 6ea21d5cf4..09375d5aab 100644 ---- a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm -+++ b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm -@@ -1208,9 +1208,10 @@ Name | Description - /logs | Display log files - /stacks | Display JVM stacks - /static/index.html | The static home page -+/prof | Async Profiler endpoint - - To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`, --and `/stacks`, configure the following properties in `kms-site.xml`: -+`/stacks` and `/prof`, configure the following properties in `kms-site.xml`: - - ```xml - -@@ -1224,7 +1225,7 @@ and `/stacks`, configure the following properties in `kms-site.xml`: - true - - Indicates if administrator ACLs are required to access -- instrumentation servlets (JMX, METRICS, CONF, STACKS). -+ instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). - - - -diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm -index 2d0a5b8cd2..e97de0275c 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm -+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm -@@ -162,9 +162,10 @@ Name | Description - /logs | Display log files - /stacks | Display JVM stacks - /static/index.html | The static home page -+/prof | Async Profiler endpoint - - To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`, --and `/stacks`, configure the following properties in `httpfs-site.xml`: -+`/stacks` and `/prof`, configure the following properties in `httpfs-site.xml`: - - ```xml - -@@ -178,7 +179,7 @@ and `/stacks`, configure the following properties in `httpfs-site.xml`: - true - - Indicates if administrator ACLs are required to access -- instrumentation servlets (JMX, METRICS, CONF, STACKS). -+ instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). - - - -diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml -index e2d149da2e..c5413d9089 100644 ---- a/hadoop-project/src/site/site.xml -+++ b/hadoop-project/src/site/site.xml -@@ -74,6 +74,7 @@ - - - -+ - - -

diff --git a/hadoop/stackable/patches/3.3.4/0004-Backport-HADOOP-18077.patch b/hadoop/stackable/patches/3.3.4/0004-Backport-HADOOP-18077.patch deleted file mode 100644 index 4f4712b40..000000000 --- a/hadoop/stackable/patches/3.3.4/0004-Backport-HADOOP-18077.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 0ea496a764360c0d4143f9ee764b9c483ddf6d34 Mon Sep 17 00:00:00 2001 -From: Siegfried Weber -Date: Tue, 6 Feb 2024 16:10:54 +0100 -Subject: Backport HADOOP-18077 - ---- - .../src/main/java/org/apache/hadoop/http/HttpServer2.java | 6 ++++-- - .../org/apache/hadoop/http/ProfilerDisabledServlet.java | 8 +++++++- - 2 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java -index a7777f85cc..bbe0d9993d 100644 ---- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java -@@ -735,10 +735,11 @@ public final class HttpServer2 implements FilterContainer { - - addDefaultServlets(); - addPrometheusServlet(conf); -- addAsyncProfilerServlet(contexts); -+ addAsyncProfilerServlet(contexts, conf); - } - -- private void addAsyncProfilerServlet(ContextHandlerCollection contexts) throws IOException { -+ private void addAsyncProfilerServlet(ContextHandlerCollection contexts, Configuration conf) -+ throws IOException { - final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome(); - if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) { - addServlet("prof", "/prof", ProfileServlet.class); -@@ -750,6 +751,7 @@ public final class HttpServer2 implements FilterContainer { - genCtx.addServlet(ProfileOutputServlet.class, "/*"); - genCtx.setResourceBase(tmpDir.toAbsolutePath().toString()); - genCtx.setDisplayName("prof-output-hadoop"); -+ setContextAttributes(genCtx, conf); - } else { - addServlet("prof", "/prof", ProfilerDisabledServlet.class); - LOG.info("ASYNC_PROFILER_HOME environment variable and async.profiler.home system property " -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java -index 459485ffa5..c488b57499 100644 ---- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java -@@ -36,9 +36,15 @@ public class ProfilerDisabledServlet extends HttpServlet { - throws IOException { - resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - ProfileServlet.setResponseHeader(resp); -+ // TODO : Replace github.com link with -+ // https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/ -+ // AsyncProfilerServlet.html once Async profiler changes are released -+ // in 3.x (3.4.0 as of today). - resp.getWriter().write("The profiler servlet was disabled at startup.\n\n" - + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" -- + "environment is properly configured."); -+ + "environment is properly configured. \n\n" -+ + "For more details, please refer to: https://github.com/apache/hadoop/blob/trunk/" -+ + "hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md"); - } - - } diff --git a/hadoop/stackable/patches/3.3.4/0005-Async-profiler-also-grab-itimer-events.patch b/hadoop/stackable/patches/3.3.4/0005-Async-profiler-also-grab-itimer-events.patch deleted file mode 100644 index 1f2694209..000000000 --- a/hadoop/stackable/patches/3.3.4/0005-Async-profiler-also-grab-itimer-events.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 395c0da87fd16ca8e00febecbabdee1fb3f48895 Mon Sep 17 00:00:00 2001 -From: Siegfried Weber -Date: Tue, 6 Feb 2024 16:10:54 +0100 -Subject: Async-profiler: also grab itimer events - ---- - .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -index fc0ec7736e..e324ad6d49 100644 ---- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; - * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) - * // Perf events: - * // cpu -+ * // itimer - * // page-faults - * // context-switches - * // cycles -@@ -115,6 +116,7 @@ public class ProfileServlet extends HttpServlet { - private enum Event { - - CPU("cpu"), -+ ITIMER("itimer"), - ALLOC("alloc"), - LOCK("lock"), - PAGE_FAULTS("page-faults"), diff --git a/hadoop/stackable/patches/3.3.4/0006-HDFS-17378-Fix-missing-operationType-for-some-operat.patch b/hadoop/stackable/patches/3.3.4/0006-HDFS-17378-Fix-missing-operationType-for-some-operat.patch deleted file mode 100644 index 057158e41..000000000 --- a/hadoop/stackable/patches/3.3.4/0006-HDFS-17378-Fix-missing-operationType-for-some-operat.patch +++ /dev/null @@ -1,201 +0,0 @@ -From b37250b77291531fea062ae1dc85429e95d854d1 Mon Sep 17 00:00:00 2001 -From: Sebastian Bernauer -Date: Thu, 15 Feb 2024 15:33:43 +0100 -Subject: HDFS-17378: Fix missing operationType for some operations in - authorizer - ---- - .../hdfs/server/namenode/FSNamesystem.java | 41 +++++++++++-------- - 1 file changed, 24 insertions(+), 17 deletions(-) - -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java -index 243f62295c..ba3caa6b6c 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java -@@ -2501,15 +2501,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - * @throws IOException - */ - BlockStoragePolicy getStoragePolicy(String src) throws IOException { -+ final String operationName = "getStoragePolicy"; - checkOperation(OperationCategory.READ); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - readLock(); - try { - checkOperation(OperationCategory.READ); - return FSDirAttrOp.getStoragePolicy(dir, pc, blockManager, src); - } finally { -- readUnlock("getStoragePolicy"); -+ readUnlock(operationName); - } - } - -@@ -2529,15 +2530,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - } - - long getPreferredBlockSize(String src) throws IOException { -+ final String operationName = "getPreferredBlockSize"; - checkOperation(OperationCategory.READ); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - readLock(); - try { - checkOperation(OperationCategory.READ); - return FSDirAttrOp.getPreferredBlockSize(dir, pc, src); - } finally { -- readUnlock("getPreferredBlockSize"); -+ readUnlock(operationName); - } - } - -@@ -2590,7 +2592,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - boolean createParent, short replication, long blockSize, - CryptoProtocolVersion[] supportedVersions, String ecPolicyName, - String storagePolicy, boolean logRetryCache) throws IOException { -- - HdfsFileStatus status; - try { - status = startFileInt(src, permissions, holder, clientMachine, flag, -@@ -2610,6 +2611,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - long blockSize, CryptoProtocolVersion[] supportedVersions, - String ecPolicyName, String storagePolicy, boolean logRetryCache) - throws IOException { -+ final String operationName = "create"; - if (NameNode.stateChangeLog.isDebugEnabled()) { - StringBuilder builder = new StringBuilder(); - builder.append("DIR* NameSystem.startFile: src=").append(src) -@@ -2647,7 +2649,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -2711,7 +2713,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - dir.writeUnlock(); - } - } finally { -- writeUnlock("create"); -+ writeUnlock(operationName); - // There might be transactions logged while trying to recover the lease. - // They need to be sync'ed even when an exception was thrown. - if (!skipSync) { -@@ -2740,10 +2742,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - */ - boolean recoverLease(String src, String holder, String clientMachine) - throws IOException { -+ final String operationName = "recoverLease"; - boolean skipSync = false; - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -2764,7 +2767,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - skipSync = true; - throw se; - } finally { -- writeUnlock("recoverLease"); -+ writeUnlock(operationName); - // There might be transactions logged while trying to recover the lease. - // They need to be sync'ed even when an exception was thrown. - if (!skipSync) { -@@ -2981,6 +2984,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - final Set excludes, - final int numAdditionalNodes, final String clientName - ) throws IOException { -+ final String operationName = "getAdditionalDatanode"; - //check if the feature is enabled - dtpReplaceDatanodeOnFailure.checkEnabled(); - -@@ -2992,7 +2996,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - final BlockType blockType; - checkOperation(OperationCategory.READ); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - readLock(); - try { - checkOperation(OperationCategory.READ); -@@ -3015,7 +3019,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", - src, fileId, blk, clientName, clientMachine)); - } finally { -- readUnlock("getAdditionalDatanode"); -+ readUnlock(operationName); - } - - if (clientnode == null) { -@@ -3037,11 +3041,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - */ - void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) - throws IOException { -+ final String operationName = "abandonBlock"; - NameNode.stateChangeLog.debug( - "BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -3050,7 +3055,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} is " + - "removed from pendingCreates", b); - } finally { -- writeUnlock("abandonBlock"); -+ writeUnlock(operationName); - } - getEditLog().logSync(); - } -@@ -3104,10 +3109,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - boolean completeFile(final String src, String holder, - ExtendedBlock last, long fileId) - throws IOException { -+ final String operationName = "completeFile"; - boolean success = false; - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -3115,7 +3121,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, - fileId); - } finally { -- writeUnlock("completeFile"); -+ writeUnlock(operationName); - } - getEditLog().logSync(); - if (success) { -@@ -3536,10 +3542,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - */ - void fsync(String src, long fileId, String clientName, long lastBlockLength) - throws IOException { -+ final String operationName = "fsync"; - NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -3553,7 +3560,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - } - FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); - } finally { -- writeUnlock("fsync"); -+ writeUnlock(operationName); - } - getEditLog().logSync(); - } diff --git a/hadoop/stackable/patches/3.3.4/0007-Bump-Snappy-version-to-fix-CVEs.patch b/hadoop/stackable/patches/3.3.4/0007-Bump-Snappy-version-to-fix-CVEs.patch deleted file mode 100644 index f6d30bc74..000000000 --- a/hadoop/stackable/patches/3.3.4/0007-Bump-Snappy-version-to-fix-CVEs.patch +++ /dev/null @@ -1,22 +0,0 @@ -From ad83ab139ffffabd99549ee5207a116f7acc7cf6 Mon Sep 17 00:00:00 2001 -From: Andrew Kenworthy -Date: Thu, 16 May 2024 16:44:14 +0200 -Subject: Bump Snappy version to fix CVEs - ---- - hadoop-project/pom.xml | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml -index 082834e261..70fb9a3b3f 100644 ---- a/hadoop-project/pom.xml -+++ b/hadoop-project/pom.xml -@@ -142,7 +142,7 @@ - 3.2.4 - 3.10.6.Final - 4.1.77.Final -- 1.1.8.2 -+ 1.1.10.4 - 1.7.1 - - diff --git a/hadoop/stackable/patches/3.3.4/0008-Add-CycloneDX-plugin.patch b/hadoop/stackable/patches/3.3.4/0008-Add-CycloneDX-plugin.patch deleted file mode 100644 index 864b35902..000000000 --- a/hadoop/stackable/patches/3.3.4/0008-Add-CycloneDX-plugin.patch +++ /dev/null @@ -1,46 +0,0 @@ -From d7fa1e47e724f2bc6f22456a0b7fbc509629d285 Mon Sep 17 00:00:00 2001 -From: Lukas Voetmand -Date: Fri, 6 Sep 2024 17:53:52 +0200 -Subject: Add CycloneDX plugin - ---- - pom.xml | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/pom.xml b/pom.xml -index f4e435c749..f0502184a2 100644 ---- a/pom.xml -+++ b/pom.xml -@@ -116,6 +116,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x - 1.4.3 - 4.2.2 - 4.2.0 -+ 2.8.0 - - bash - -@@ -491,6 +492,24 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x - com.github.spotbugs - spotbugs-maven-plugin - -+ -+ org.cyclonedx -+ cyclonedx-maven-plugin -+ ${cyclonedx.version} -+ -+ application -+ 1.5 -+ false -+ -+ -+ -+ package -+ -+ makeBom -+ -+ -+ -+ - - - diff --git a/hadoop/stackable/patches/3.3.4/0009-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch b/hadoop/stackable/patches/3.3.4/0009-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch deleted file mode 100644 index 633a5339e..000000000 --- a/hadoop/stackable/patches/3.3.4/0009-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch +++ /dev/null @@ -1,973 +0,0 @@ -From cfc358b6e36d9565076b325a2153be24bbf348ba Mon Sep 17 00:00:00 2001 -From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> -Date: Fri, 7 Jun 2024 19:03:23 +0530 -Subject: HADOOP-18516: [ABFS][Authentication] Support Fixed SAS Token for ABFS - Authentication (#6552) - -Contributed by Anuj Modi ---- - .../hadoop/fs/azurebfs/AbfsConfiguration.java | 69 +++++-- - .../fs/azurebfs/AzureBlobFileSystem.java | 3 +- - .../fs/azurebfs/AzureBlobFileSystemStore.java | 2 +- - .../azurebfs/constants/ConfigurationKeys.java | 5 +- - .../fs/azurebfs/services/AbfsClient.java | 8 +- - .../services/FixedSASTokenProvider.java | 65 +++++++ - .../hadoop-azure/src/site/markdown/abfs.md | 149 +++++++++++--- - .../azurebfs/AbstractAbfsIntegrationTest.java | 23 ++- - .../ITestAzureBlobFileSystemChooseSAS.java | 182 ++++++++++++++++++ - .../MockDelegationSASTokenProvider.java | 2 +- - .../extensions/MockSASTokenProvider.java | 16 +- - .../azurebfs/utils/AccountSASGenerator.java | 103 ++++++++++ - .../fs/azurebfs/utils/SASGenerator.java | 34 +++- - .../azurebfs/utils/ServiceSASGenerator.java | 15 +- - 14 files changed, 607 insertions(+), 69 deletions(-) - create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java - create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java - create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java - -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java -index 50cc57447f..7e38da987e 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java -@@ -59,6 +59,7 @@ import org.apache.hadoop.fs.azurebfs.oauth2.UserPasswordTokenProvider; - import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; - import org.apache.hadoop.fs.azurebfs.services.AuthType; - import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; -+import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; - import org.apache.hadoop.fs.azurebfs.services.KeyProvider; - import org.apache.hadoop.fs.azurebfs.services.SimpleKeyProvider; - import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; -@@ -876,33 +877,63 @@ public class AbfsConfiguration{ - } - } - -+ /** -+ * Returns the SASTokenProvider implementation to be used to generate SAS token.
-+ * Users can choose between a custom implementation of {@link SASTokenProvider} -+ * or an in house implementation {@link FixedSASTokenProvider}.
-+ * For Custom implementation "fs.azure.sas.token.provider.type" needs to be provided.
-+ * For Fixed SAS Token use "fs.azure.sas.fixed.token" needs to be provided.
-+ * In case both are provided, Preference will be given to Custom implementation.
-+ * Avoid using a custom tokenProvider implementation just to read the configured -+ * fixed token, as this could create confusion. Also,implementing the SASTokenProvider -+ * requires relying on the raw configurations. It is more stable to depend on -+ * the AbfsConfiguration with which a filesystem is initialized, and eliminate -+ * chances of dynamic modifications and spurious situations.
-+ * @return sasTokenProvider object based on configurations provided -+ * @throws AzureBlobFileSystemException -+ */ - public SASTokenProvider getSASTokenProvider() throws AzureBlobFileSystemException { - AuthType authType = getEnum(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey); - if (authType != AuthType.SAS) { - throw new SASTokenProviderException(String.format( -- "Invalid auth type: %s is being used, expecting SAS", authType)); -+ "Invalid auth type: %s is being used, expecting SAS.", authType)); - } - - try { -- String configKey = FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; -- Class sasTokenProviderClass = -- getTokenProviderClass(authType, configKey, null, -- SASTokenProvider.class); -- -- Preconditions.checkArgument(sasTokenProviderClass != null, -- String.format("The configuration value for \"%s\" is invalid.", configKey)); -- -- SASTokenProvider sasTokenProvider = ReflectionUtils -- .newInstance(sasTokenProviderClass, rawConfig); -- Preconditions.checkArgument(sasTokenProvider != null, -- String.format("Failed to initialize %s", sasTokenProviderClass)); -- -- LOG.trace("Initializing {}", sasTokenProviderClass.getName()); -- sasTokenProvider.initialize(rawConfig, accountName); -- LOG.trace("{} init complete", sasTokenProviderClass.getName()); -- return sasTokenProvider; -+ Class customSasTokenProviderImplementation = -+ getTokenProviderClass(authType, FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, -+ null, SASTokenProvider.class); -+ String configuredFixedToken = this.getTrimmedPasswordString(FS_AZURE_SAS_FIXED_TOKEN, EMPTY_STRING); -+ -+ if (customSasTokenProviderImplementation == null && configuredFixedToken.isEmpty()) { -+ throw new SASTokenProviderException(String.format( -+ "At least one of the \"%s\" and \"%s\" must be set.", -+ FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, FS_AZURE_SAS_FIXED_TOKEN)); -+ } -+ -+ // Prefer Custom SASTokenProvider Implementation if configured. -+ if (customSasTokenProviderImplementation != null) { -+ LOG.trace("Using Custom SASTokenProvider implementation because it is given precedence when it is set."); -+ SASTokenProvider sasTokenProvider = ReflectionUtils.newInstance( -+ customSasTokenProviderImplementation, rawConfig); -+ if (sasTokenProvider == null) { -+ throw new SASTokenProviderException(String.format( -+ "Failed to initialize %s", customSasTokenProviderImplementation)); -+ } -+ LOG.trace("Initializing {}", customSasTokenProviderImplementation.getName()); -+ sasTokenProvider.initialize(rawConfig, accountName); -+ LOG.trace("{} init complete", customSasTokenProviderImplementation.getName()); -+ return sasTokenProvider; -+ } else { -+ LOG.trace("Using FixedSASTokenProvider implementation"); -+ FixedSASTokenProvider fixedSASTokenProvider = new FixedSASTokenProvider(configuredFixedToken); -+ return fixedSASTokenProvider; -+ } -+ } catch (SASTokenProviderException e) { -+ throw e; - } catch (Exception e) { -- throw new TokenAccessProviderException("Unable to load SAS token provider class: " + e, e); -+ throw new SASTokenProviderException( -+ "Unable to load SAS token provider class: " + e, e); - } - } - -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java -index 750306c4a9..955efb7b70 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java -@@ -1166,10 +1166,9 @@ public class AzureBlobFileSystem extends FileSystem - - /** - * Incrementing exists() calls from superclass for statistic collection. -- * - * @param f source path. - * @return true if the path exists. -- * @throws IOException -+ * @throws IOException if some issue in checking path. - */ - @Override - public boolean exists(Path f) throws IOException { -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java -index d86a3d9684..db50161885 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java -@@ -1559,7 +1559,7 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { - creds = new SharedKeyCredentials(accountName.substring(0, dotIndex), - abfsConfiguration.getStorageAccountKey()); - } else if (authType == AuthType.SAS) { -- LOG.trace("Fetching SAS token provider"); -+ LOG.trace("Fetching SAS Token Provider"); - sasTokenProvider = abfsConfiguration.getSASTokenProvider(); - } else { - LOG.trace("Fetching token provider"); -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java -index 12beb5a9bb..73ddfc303d 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java -@@ -227,7 +227,10 @@ public final class ConfigurationKeys { - public static final String FS_AZURE_ENABLE_DELEGATION_TOKEN = "fs.azure.enable.delegation.token"; - public static final String FS_AZURE_DELEGATION_TOKEN_PROVIDER_TYPE = "fs.azure.delegation.token.provider.type"; - -- /** Key for SAS token provider **/ -+ /** Key for fixed SAS token: {@value}. **/ -+ public static final String FS_AZURE_SAS_FIXED_TOKEN = "fs.azure.sas.fixed.token"; -+ -+ /** Key for SAS token provider: {@value}. **/ - public static final String FS_AZURE_SAS_TOKEN_PROVIDER_TYPE = "fs.azure.sas.token.provider.type"; - - /** For performance, AbfsInputStream/AbfsOutputStream re-use SAS tokens until the expiry is within this number of seconds. **/ -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java -index 69ef0d01c7..982013fef3 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java -@@ -620,6 +620,7 @@ public class AbfsClient implements Closeable { - abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RETAIN_UNCOMMITTED_DATA, String.valueOf(retainUncommittedData)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, String.valueOf(isClose)); -+ - // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance - String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, - abfsUriQueryBuilder, cachedSasToken); -@@ -701,6 +702,7 @@ public class AbfsClient implements Closeable { - requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); -+ - // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance - String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, - abfsUriQueryBuilder, cachedSasToken); -@@ -995,12 +997,12 @@ public class AbfsClient implements Closeable { - sasToken = cachedSasToken; - LOG.trace("Using cached SAS token."); - } -+ - queryBuilder.setSASToken(sasToken); - LOG.trace("SAS token fetch complete for {} on {}", operation, path); - } catch (Exception ex) { -- throw new SASTokenProviderException(String.format("Failed to acquire a SAS token for %s on %s due to %s", -- operation, -- path, -+ throw new SASTokenProviderException(String.format( -+ "Failed to acquire a SAS token for %s on %s due to %s", operation, path, - ex.toString())); - } - } -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java -new file mode 100644 -index 0000000000..1a2614dcc1 ---- /dev/null -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java -@@ -0,0 +1,65 @@ -+/** -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.fs.azurebfs.services; -+ -+import java.io.IOException; -+ -+import org.apache.hadoop.conf.Configuration; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; -+import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; -+ -+/** -+ * In house implementation of {@link SASTokenProvider} to use a fixed SAS token with ABFS. -+ * Use this to avoid implementing a Custom Token Provider just to return fixed SAS. -+ * Fixed SAS Token to be provided using the config "fs.azure.sas.fixed.token". -+ */ -+public class FixedSASTokenProvider implements SASTokenProvider { -+ private String fixedSASToken; -+ -+ public FixedSASTokenProvider(final String fixedSASToken) throws SASTokenProviderException { -+ this.fixedSASToken = fixedSASToken; -+ if (fixedSASToken == null || fixedSASToken.isEmpty()) { -+ throw new SASTokenProviderException( -+ String.format("Configured Fixed SAS Token is Invalid: %s", fixedSASToken)); -+ } -+ } -+ -+ @Override -+ public void initialize(final Configuration configuration, -+ final String accountName) -+ throws IOException { -+ } -+ -+ /** -+ * Returns the fixed SAS Token configured. -+ * @param account the name of the storage account. -+ * @param fileSystem the name of the fileSystem. -+ * @param path the file or directory path. -+ * @param operation the operation to be performed on the path. -+ * @return Fixed SAS Token -+ * @throws IOException never -+ */ -+ @Override -+ public String getSASToken(final String account, -+ final String fileSystem, -+ final String path, -+ final String operation) throws IOException { -+ return fixedSASToken; -+ } -+} -diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md -index dfb7f3f42a..9ea8903583 100644 ---- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md -+++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md -@@ -12,7 +12,7 @@ - limitations under the License. See accompanying LICENSE file. - --> - --# Hadoop Azure Support: ABFS — Azure Data Lake Storage Gen2 -+# Hadoop Azure Support: ABFS - Azure Data Lake Storage Gen2 - - - -@@ -309,12 +309,13 @@ in different deployment situations. - The ABFS client can be deployed in different ways, with its authentication needs - driven by them. - --1. With the storage account's authentication secret in the configuration: --"Shared Key". --1. Using OAuth 2.0 tokens of one form or another. --1. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, -- "Managed Instance". --1. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. -+1. With the storage account's authentication secret in the configuration: "Shared Key". -+2. Using OAuth 2.0 tokens of one form or another. -+3. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, "Managed Instance". -+4. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. -+5. By directly configuring a fixed Shared Access Signature (SAS) token in the account configuration settings files. -+ -+Note: SAS Based Authentication should be used only with HNS Enabled accounts. - - What can be changed is what secrets/credentials are used to authenticate the caller. - -@@ -355,14 +356,14 @@ the password, "key", retrieved from the XML/JCECKs configuration files. - - ```xml - -- fs.azure.account.auth.type.abfswales1.dfs.core.windows.net -+ fs.azure.account.auth.type.ACCOUNT_NAME.dfs.core.windows.net - SharedKey - - - - -- fs.azure.account.key.abfswales1.dfs.core.windows.net -- ZGlkIHlvdSByZWFsbHkgdGhpbmsgSSB3YXMgZ29pbmcgdG8gcHV0IGEga2V5IGluIGhlcmU/IA== -+ fs.azure.account.key.ACCOUNT_NAME.dfs.core.windows.net -+ ACCOUNT_KEY - - The secret password. Never share these. - -@@ -609,21 +610,119 @@ In case delegation token is enabled, and the config `fs.azure.delegation.token - - ### Shared Access Signature (SAS) Token Provider - --A Shared Access Signature (SAS) token provider supplies the ABFS connector with SAS --tokens by implementing the SASTokenProvider interface. -- --```xml -- -- fs.azure.account.auth.type -- SAS -- -- -- fs.azure.sas.token.provider.type -- {fully-qualified-class-name-for-implementation-of-SASTokenProvider-interface} -- --``` -- --The declared class must implement `org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. -+A shared access signature (SAS) provides secure delegated access to resources in -+your storage account. With a SAS, you have granular control over how a client can access your data. -+To know more about how SAS Authentication works refer to -+[Grant limited access to Azure Storage resources using shared access signatures (SAS)](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) -+ -+There are three types of SAS supported by Azure Storage: -+- [User Delegation SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas): Recommended for use with ABFS Driver with HNS Enabled ADLS Gen2 accounts. It is Identity based SAS that works at blob/directory level) -+- [Service SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas): Global and works at container level. -+- [Account SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas): Global and works at account level. -+ -+#### Known Issues With SAS -+- SAS Based Authentication works only with HNS Enabled ADLS Gen2 Accounts which -+is a recommended account type to be used with ABFS. -+- Certain root level operations are known to fail with SAS Based Authentication. -+ -+#### Using User Delegation SAS with ABFS -+ -+- **Description**: ABFS allows you to implement your custom SAS Token Provider -+that uses your identity to create a user delegation key which then can be used to -+create SAS instead of storage account key. The declared class must implement -+`org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. -+ -+- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: -+ 1. Authentication Type: -+ ```xml -+ -+ fs.azure.account.auth.type -+ SAS -+ -+ ``` -+ -+ 1. Custom SAS Token Provider Class: -+ ```xml -+ -+ fs.azure.sas.token.provider.type -+ CUSTOM_SAS_TOKEN_PROVIDER_CLASS -+ -+ ``` -+ -+ Replace `CUSTOM_SAS_TOKEN_PROVIDER_CLASS` with fully qualified class name of -+your custom token provider implementation. Depending upon the implementation you -+might need to specify additional configurations that are required by your custom -+implementation. -+ -+- **Example**: ABFS Hadoop Driver provides a [MockDelegationSASTokenProvider](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java) -+implementation that can be used as an example on how to implement your own custom -+SASTokenProvider. This requires the Application credentials to be specifed using -+the following configurations apart from above two: -+ -+ 1. App Service Principle Tenant Id: -+ ```xml -+ -+ fs.azure.test.app.service.principal.tenant.id -+ TENANT_ID -+ -+ ``` -+ 1. App Service Principle Object Id: -+ ```xml -+ -+ fs.azure.test.app.service.principal.object.id -+ OBJECT_ID -+ -+ ``` -+ 1. App Id: -+ ```xml -+ -+ fs.azure.test.app.id -+ APPLICATION_ID -+ -+ ``` -+ 1. App Secret: -+ ```xml -+ -+ fs.azure.test.app.secret -+ APPLICATION_SECRET -+ -+ ``` -+ -+- **Security**: More secure than Shared Key and allows granting limited access -+to data without exposing the access key. Recommended to be used only with HNS Enabled, -+ADLS Gen 2 storage accounts. -+ -+#### Using Account/Service SAS with ABFS -+ -+- **Description**: ABFS allows user to use Account/Service SAS for authenticating -+requests. User can specify them as fixed SAS Token to be used across all the requests. -+ -+- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: -+ -+ 1. Authentication Type: -+ ```xml -+ -+ fs.azure.account.auth.type -+ SAS -+ -+ ``` -+ -+ 1. Fixed SAS Token: -+ ```xml -+ -+ fs.azure.sas.fixed.token -+ FIXED_SAS_TOKEN -+ -+ ``` -+ -+ Replace `FIXED_SAS_TOKEN` with fixed Account/Service SAS. You can also -+generate SAS from Azure portal. Account -> Security + Networking -> Shared Access Signature -+ -+- **Security**: Account/Service SAS requires account keys to be used which makes -+them less secure. There is no scope of having delegated access to different users. -+ -+*Note:* When `fs.azure.sas.token.provider.type` and `fs.azure.fixed.sas.token` -+are both configured, precedence will be given to the custom token provider implementation. - - ## Technical notes - -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java -index fd2f2690da..3fe1c641cb 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java -@@ -265,12 +265,29 @@ public abstract class AbstractAbfsIntegrationTest extends - useConfiguredFileSystem = true; - } - -+ /** -+ * Create a filesystem for SAS tests using the SharedKey authentication. -+ * We do not allow filesystem creation with SAS because certain type of SAS do not have -+ * required permissions, and it is not known what type of SAS is configured by user. -+ * @throws Exception -+ */ - protected void createFilesystemForSASTests() throws Exception { -- // The SAS tests do not have permission to create a filesystem -- // so first create temporary instance of the filesystem using SharedKey -- // then re-use the filesystem it creates with SAS auth instead of SharedKey. -+ createFilesystemWithTestFileForSASTests(null); -+ } -+ -+ /** -+ * Create a filesystem for SAS tests along with a test file using SharedKey authentication. -+ * We do not allow filesystem creation with SAS because certain type of SAS do not have -+ * required permissions, and it is not known what type of SAS is configured by user. -+ * @param testPath path of the test file. -+ * @throws Exception -+ */ -+ protected void createFilesystemWithTestFileForSASTests(Path testPath) throws Exception { - AzureBlobFileSystem tempFs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig); - Assert.assertTrue(tempFs.exists(new Path("/"))); -+ if (testPath != null) { -+ tempFs.create(testPath).close(); -+ } - abfsConfig.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SAS.name()); - usingFilesystemForSASTests = true; - } -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java -new file mode 100644 -index 0000000000..d8db901151 ---- /dev/null -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java -@@ -0,0 +1,182 @@ -+/** -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+package org.apache.hadoop.fs.azurebfs; -+ -+import java.io.IOException; -+import java.nio.file.AccessDeniedException; -+ -+import org.assertj.core.api.Assertions; -+import org.junit.Assume; -+import org.junit.Test; -+ -+import org.apache.hadoop.fs.FileSystem; -+import org.apache.hadoop.fs.Path; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; -+import org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider; -+import org.apache.hadoop.fs.azurebfs.services.AuthType; -+import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; -+import org.apache.hadoop.fs.azurebfs.utils.AccountSASGenerator; -+import org.apache.hadoop.fs.azurebfs.utils.Base64; -+ -+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_FIXED_TOKEN; -+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; -+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.accountProperty; -+import static org.apache.hadoop.test.LambdaTestUtils.intercept; -+ -+/** -+ * Tests to validate the choice between using a custom SASTokenProvider -+ * implementation and FixedSASTokenProvider. -+ */ -+public class ITestAzureBlobFileSystemChooseSAS extends AbstractAbfsIntegrationTest{ -+ -+ private String accountSAS = null; -+ private static final String TEST_PATH = "testPath"; -+ -+ /** -+ * To differentiate which SASTokenProvider was used we will use different type of SAS Tokens. -+ * FixedSASTokenProvider will return an Account SAS with only read permissions. -+ * SASTokenProvider will return a User Delegation SAS Token with both read and write permissions. -+= */ -+ public ITestAzureBlobFileSystemChooseSAS() throws Exception { -+ // SAS Token configured might not have permissions for creating file system. -+ // Shared Key must be configured to create one. Once created, a new instance -+ // of same file system will be used with SAS Authentication. -+ Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); -+ } -+ -+ @Override -+ public void setup() throws Exception { -+ createFilesystemWithTestFileForSASTests(new Path(TEST_PATH)); -+ super.setup(); -+ generateAccountSAS(); -+ } -+ -+ /** -+ * Generates an Account SAS Token using the Account Shared Key to be used as a fixed SAS Token. -+ * Account SAS used here will have only read permissions to resources. -+ * This will be used by individual tests to set in the configurations. -+ * @throws AzureBlobFileSystemException -+ */ -+ private void generateAccountSAS() throws AzureBlobFileSystemException { -+ final String accountKey = getConfiguration().getStorageAccountKey(); -+ AccountSASGenerator configAccountSASGenerator = new AccountSASGenerator(Base64.decode(accountKey)); -+ // Setting only read permissions. -+ configAccountSASGenerator.setPermissions("r"); -+ accountSAS = configAccountSASGenerator.getAccountSAS(getAccountName()); -+ } -+ -+ /** -+ * Tests the scenario where both the custom SASTokenProvider and a fixed SAS token are configured. -+ * Custom implementation of SASTokenProvider class should be chosen and User Delegation SAS should be used. -+ * @throws Exception -+ */ -+ @Test -+ public void testBothProviderFixedTokenConfigured() throws Exception { -+ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( -+ getRawConfiguration(), this.getAccountName()); -+ removeAnyPresetConfiguration(testAbfsConfig); -+ -+ // Configuring a SASTokenProvider class which provides a user delegation SAS. -+ testAbfsConfig.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, -+ MockDelegationSASTokenProvider.class.getName()); -+ -+ // configuring the Fixed SAS token which is an Account SAS. -+ testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); -+ -+ // Creating a new file system with updated configs. -+ try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) -+ FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { -+ -+ // Asserting that MockDelegationSASTokenProvider is used. -+ Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) -+ .describedAs("Custom SASTokenProvider Class must be used") -+ .isInstanceOf(MockDelegationSASTokenProvider.class); -+ -+ // Assert that User Delegation SAS is used and both read and write operations are permitted. -+ Path testPath = path(getMethodName()); -+ newTestFs.create(testPath).close(); -+ newTestFs.open(testPath).close(); -+ } -+ } -+ -+ /** -+ * Tests the scenario where only the fixed token is configured, and no token provider class is set. -+ * Account SAS Token configured as fixed SAS should be used. -+ * Also verifies that Account Specific as well as Account Agnostic Fixed SAS Token Works. -+ * @throws IOException -+ */ -+ @Test -+ public void testOnlyFixedTokenConfigured() throws Exception { -+ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( -+ getRawConfiguration(), this.getAccountName()); -+ -+ // setting an Account Specific Fixed SAS token. -+ removeAnyPresetConfiguration(testAbfsConfig); -+ testAbfsConfig.set(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName()), accountSAS); -+ testOnlyFixedTokenConfiguredInternal(testAbfsConfig); -+ -+ // setting an Account Agnostic Fixed SAS token. -+ removeAnyPresetConfiguration(testAbfsConfig); -+ testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); -+ testOnlyFixedTokenConfiguredInternal(testAbfsConfig); -+ } -+ -+ private void testOnlyFixedTokenConfiguredInternal(AbfsConfiguration testAbfsConfig) throws Exception { -+ // Creating a new filesystem with updated configs. -+ try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) -+ FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { -+ -+ // Asserting that FixedSASTokenProvider is used. -+ Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) -+ .describedAs("FixedSASTokenProvider Class must be used") -+ .isInstanceOf(FixedSASTokenProvider.class); -+ -+ // Assert that Account SAS is used and only read operations are permitted. -+ Path testPath = path(getMethodName()); -+ intercept(AccessDeniedException.class, () -> { -+ newTestFs.create(testPath); -+ }); -+ // Read Operation is permitted -+ newTestFs.getFileStatus(new Path(TEST_PATH)); -+ } -+ } -+ -+ /** -+ * Tests the scenario where both the token provider class and the fixed token are not configured. -+ * The code errors out at the initialization stage itself. -+ * @throws IOException -+ */ -+ @Test -+ public void testBothProviderFixedTokenUnset() throws Exception { -+ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( -+ getRawConfiguration(), this.getAccountName()); -+ removeAnyPresetConfiguration(testAbfsConfig); -+ -+ intercept(SASTokenProviderException.class, () -> { -+ FileSystem.newInstance(testAbfsConfig.getRawConfiguration()); -+ }); -+ } -+ -+ private void removeAnyPresetConfiguration(AbfsConfiguration testAbfsConfig) { -+ testAbfsConfig.unset(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE); -+ testAbfsConfig.unset(FS_AZURE_SAS_FIXED_TOKEN); -+ testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, this.getAccountName())); -+ testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName())); -+ } -+} -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java -index cf7d51da4c..d1e5dd4519 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java -@@ -40,7 +40,7 @@ import org.apache.hadoop.fs.azurebfs.utils.SASGenerator; - import org.apache.hadoop.security.AccessControlException; - - /** -- * A mock SAS token provider implementation -+ * A mock SAS token provider implementation. - */ - public class MockDelegationSASTokenProvider implements SASTokenProvider { - -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java -index 50ac20970f..3fda128a9c 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java -@@ -20,7 +20,11 @@ package org.apache.hadoop.fs.azurebfs.extensions; - - import java.io.IOException; - -+import org.slf4j.Logger; -+import org.slf4j.LoggerFactory; -+ - import org.apache.hadoop.conf.Configuration; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; - import org.apache.hadoop.security.AccessControlException; - - import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; -@@ -28,17 +32,25 @@ import org.apache.hadoop.fs.azurebfs.utils.Base64; - import org.apache.hadoop.fs.azurebfs.utils.ServiceSASGenerator; - - /** -- * A mock SAS token provider implementation -+ * A mock SAS token provider implementation. - */ - public class MockSASTokenProvider implements SASTokenProvider { - - private byte[] accountKey; - private ServiceSASGenerator generator; - private boolean skipAuthorizationForTestSetup = false; -+ private static final Logger LOG = LoggerFactory.getLogger(MockSASTokenProvider.class); - - // For testing we use a container SAS for all operations. - private String generateSAS(byte[] accountKey, String accountName, String fileSystemName) { -- return generator.getContainerSASWithFullControl(accountName, fileSystemName); -+ String containerSAS = ""; -+ try { -+ containerSAS = generator.getContainerSASWithFullControl(accountName, fileSystemName); -+ } catch (InvalidConfigurationValueException e) { -+ LOG.debug(e.getMessage()); -+ containerSAS = ""; -+ } -+ return containerSAS; - } - - @Override -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java -new file mode 100644 -index 0000000000..2af741b7a4 ---- /dev/null -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java -@@ -0,0 +1,103 @@ -+/** -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.fs.azurebfs.utils; -+ -+import java.time.Instant; -+ -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -+import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; -+ -+/** -+ * Test Account SAS Generator. -+ * SAS generated by this will have only read access to storage account blob and file services. -+ */ -+public class AccountSASGenerator extends SASGenerator { -+ /** -+ * Creates Account SAS from Storage Account Key. -+ * https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas. -+ * @param accountKey: the storage account key. -+ */ -+ public AccountSASGenerator(byte[] accountKey) { -+ super(accountKey); -+ } -+ -+ private String permissions = "racwdl"; -+ -+ public String getAccountSAS(String accountName) throws -+ AzureBlobFileSystemException { -+ // retaining only the account name -+ accountName = getCanonicalAccountName(accountName); -+ String sp = permissions; -+ String sv = "2021-06-08"; -+ String srt = "sco"; -+ -+ String st = ISO_8601_FORMATTER.format(Instant.now().minus(FIVE_MINUTES)); -+ String se = ISO_8601_FORMATTER.format(Instant.now().plus(ONE_DAY)); -+ -+ String ss = "bf"; -+ String spr = "https"; -+ String signature = computeSignatureForSAS(sp, ss, srt, st, se, sv, accountName); -+ -+ AbfsUriQueryBuilder qb = new AbfsUriQueryBuilder(); -+ qb.addQuery("sp", sp); -+ qb.addQuery("ss", ss); -+ qb.addQuery("srt", srt); -+ qb.addQuery("st", st); -+ qb.addQuery("se", se); -+ qb.addQuery("sv", sv); -+ qb.addQuery("sig", signature); -+ return qb.toString().substring(1); -+ } -+ -+ private String computeSignatureForSAS(String signedPerm, String signedService, String signedResType, -+ String signedStart, String signedExp, String signedVersion, String accountName) { -+ -+ StringBuilder sb = new StringBuilder(); -+ sb.append(accountName); -+ sb.append("\n"); -+ sb.append(signedPerm); -+ sb.append("\n"); -+ sb.append(signedService); -+ sb.append("\n"); -+ sb.append(signedResType); -+ sb.append("\n"); -+ sb.append(signedStart); -+ sb.append("\n"); -+ sb.append(signedExp); -+ sb.append("\n"); -+ sb.append("\n"); // signedIP -+ sb.append("\n"); // signedProtocol -+ sb.append(signedVersion); -+ sb.append("\n"); -+ sb.append("\n"); //signed encryption scope -+ -+ String stringToSign = sb.toString(); -+ LOG.debug("Account SAS stringToSign: " + stringToSign.replace("\n", ".")); -+ return computeHmac256(stringToSign); -+ } -+ -+ /** -+ * By default Account SAS has all the available permissions. Use this to -+ * override the default permissions and set as per the requirements. -+ * @param permissions -+ */ -+ public void setPermissions(final String permissions) { -+ this.permissions = permissions; -+ } -+} -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java -index 2e9289d8d4..a80ddac5ed 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java -@@ -29,6 +29,10 @@ import javax.crypto.spec.SecretKeySpec; - - import org.slf4j.Logger; - import org.slf4j.LoggerFactory; -+ -+import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; -+ - /** - * Test SAS generator. - */ -@@ -54,10 +58,8 @@ public abstract class SASGenerator { - protected static final Logger LOG = LoggerFactory.getLogger(SASGenerator.class); - public static final Duration FIVE_MINUTES = Duration.ofMinutes(5); - public static final Duration ONE_DAY = Duration.ofDays(1); -- public static final DateTimeFormatter ISO_8601_FORMATTER = -- DateTimeFormatter -- .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT) -- .withZone(ZoneId.of("UTC")); -+ public static final DateTimeFormatter ISO_8601_FORMATTER = DateTimeFormatter -+ .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT).withZone(ZoneId.of("UTC")); - - private Mac hmacSha256; - private byte[] key; -@@ -68,7 +70,7 @@ public abstract class SASGenerator { - - /** - * Called by subclasses to initialize the cryptographic SHA-256 HMAC provider. -- * @param key - a 256-bit secret key -+ * @param key - a 256-bit secret key. - */ - protected SASGenerator(byte[] key) { - this.key = key; -@@ -85,6 +87,26 @@ public abstract class SASGenerator { - } - } - -+ protected String getCanonicalAccountName(String accountName) throws -+ InvalidConfigurationValueException { -+ // returns the account name without the endpoint -+ // given account names with endpoint have the format accountname.endpoint -+ // For example, input of xyz.dfs.core.windows.net should return "xyz" only -+ int dotIndex = accountName.indexOf(AbfsHttpConstants.DOT); -+ if (dotIndex == 0) { -+ // case when accountname starts with a ".": endpoint is present, accountName is null -+ // for example .dfs.azure.com, which is invalid -+ throw new InvalidConfigurationValueException("Account Name is not fully qualified"); -+ } -+ if (dotIndex > 0) { -+ // case when endpoint is present with accountName -+ return accountName.substring(0, dotIndex); -+ } else { -+ // case when accountName is already canonicalized -+ return accountName; -+ } -+ } -+ - protected String computeHmac256(final String stringToSign) { - byte[] utf8Bytes; - try { -@@ -98,4 +120,4 @@ public abstract class SASGenerator { - } - return Base64.encode(hmac); - } --} -\ No newline at end of file -+} -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java -index 24a1cea255..0ae5239e8f 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java -@@ -20,23 +20,26 @@ package org.apache.hadoop.fs.azurebfs.utils; - - import java.time.Instant; - -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; - import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; - - /** -- * Test Service SAS generator. -+ * Test Service SAS Generator. - */ - public class ServiceSASGenerator extends SASGenerator { - - /** -- * Creates a SAS Generator for Service SAS -+ * Creates a SAS Generator for Service SAS. - * (https://docs.microsoft.com/en-us/rest/api/storageservices/create-service-sas). -- * @param accountKey - the storage account key -+ * @param accountKey - the storage account key. - */ - public ServiceSASGenerator(byte[] accountKey) { - super(accountKey); - } - -- public String getContainerSASWithFullControl(String accountName, String containerName) { -+ public String getContainerSASWithFullControl(String accountName, String containerName) throws -+ InvalidConfigurationValueException { -+ accountName = getCanonicalAccountName(accountName); - String sp = "rcwdl"; - String sv = AuthenticationVersion.Feb20.toString(); - String sr = "c"; -@@ -66,7 +69,7 @@ public class ServiceSASGenerator extends SASGenerator { - sb.append("\n"); - sb.append(se); - sb.append("\n"); -- // canonicalized resource -+ // canonicalize resource - sb.append("/blob/"); - sb.append(accountName); - sb.append("/"); -@@ -93,4 +96,4 @@ public class ServiceSASGenerator extends SASGenerator { - LOG.debug("Service SAS stringToSign: " + stringToSign.replace("\n", ".")); - return computeHmac256(stringToSign); - } --} -\ No newline at end of file -+} diff --git a/hadoop/stackable/patches/3.3.4/patchable.toml b/hadoop/stackable/patches/3.3.4/patchable.toml deleted file mode 100644 index a7fece048..000000000 --- a/hadoop/stackable/patches/3.3.4/patchable.toml +++ /dev/null @@ -1,2 +0,0 @@ -base = "a585a73c3e02ac62350c136643a5e7f6095a3dbb" -mirror = "https://github.com/stackabletech/hadoop.git" diff --git a/hadoop/stackable/patches/3.4.0/0001-YARN-11527-Update-node.js.patch b/hadoop/stackable/patches/3.4.0/0001-YARN-11527-Update-node.js.patch deleted file mode 100644 index 4d98247c7..000000000 --- a/hadoop/stackable/patches/3.4.0/0001-YARN-11527-Update-node.js.patch +++ /dev/null @@ -1,22 +0,0 @@ -From a3096eeaece059cebe553d188f81f6864a056bdc Mon Sep 17 00:00:00 2001 -From: Siegfried Weber -Date: Thu, 21 Dec 2023 13:51:13 +0100 -Subject: YARN-11527: Update node.js - ---- - hadoop-project/pom.xml | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml -index 0ed96d087b..6f3b9371cd 100644 ---- a/hadoop-project/pom.xml -+++ b/hadoop-project/pom.xml -@@ -217,7 +217,7 @@ - 1.0.2 - 5.4.0 - 9.31 -- v12.22.1 -+ v14.17.0 - v1.22.5 - 1.10.13 - 1.20 diff --git a/hadoop/stackable/patches/3.4.0/0002-Allow-overriding-datanode-registration-addresses.patch b/hadoop/stackable/patches/3.4.0/0002-Allow-overriding-datanode-registration-addresses.patch deleted file mode 100644 index 55eaf6723..000000000 --- a/hadoop/stackable/patches/3.4.0/0002-Allow-overriding-datanode-registration-addresses.patch +++ /dev/null @@ -1,259 +0,0 @@ -From 448b27ab25e4bf7f5aff97c256e9ebbe2d1ad181 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= -Date: Thu, 11 Jan 2024 14:01:02 +0100 -Subject: Allow overriding datanode registration addresses - ---- - .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ - .../blockmanagement/DatanodeManager.java | 43 +++++++----- - .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ - .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- - 4 files changed, 135 insertions(+), 22 deletions(-) - -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -index f92a2ad565..25bcd438c7 100755 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -@@ -152,6 +152,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { - public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; - public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; - public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; -+ -+ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; -+ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; -+ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; -+ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; -+ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; -+ - public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; - public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; - public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; -@@ -491,6 +498,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { - public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = - TimeUnit.SECONDS.toMillis(2); - -+ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; -+ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; - public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; - public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; - -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -index ebd2fa992e..c56f254478 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -@@ -181,6 +181,8 @@ public class DatanodeManager { - private boolean hasClusterEverBeenMultiRack = false; - - private final boolean checkIpHostnameInRegistration; -+ private final boolean allowRegistrationAddressOverride; -+ - /** - * Whether we should tell datanodes what to cache in replies to - * heartbeat messages. -@@ -314,6 +316,11 @@ public class DatanodeManager { - // Block invalidate limit also has some dependency on heartbeat interval. - // Check setBlockInvalidateLimit(). - setBlockInvalidateLimit(configuredBlockInvalidateLimit); -+ this.allowRegistrationAddressOverride = conf.getBoolean( -+ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, -+ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); -+ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY -+ + "=" + allowRegistrationAddressOverride); - this.checkIpHostnameInRegistration = conf.getBoolean( - DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, - DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); -@@ -1158,27 +1165,29 @@ public class DatanodeManager { - */ - public void registerDatanode(DatanodeRegistration nodeReg) - throws DisallowedDatanodeException, UnresolvedTopologyException { -- InetAddress dnAddress = Server.getRemoteIp(); -- if (dnAddress != null) { -- // Mostly called inside an RPC, update ip and peer hostname -- String hostname = dnAddress.getHostName(); -- String ip = dnAddress.getHostAddress(); -- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { -- // Reject registration of unresolved datanode to prevent performance -- // impact of repetitive DNS lookups later. -- final String message = "hostname cannot be resolved (ip=" -- + ip + ", hostname=" + hostname + ")"; -- LOG.warn("Unresolved datanode registration: " + message); -- throw new DisallowedDatanodeException(nodeReg, message); -+ if (!allowRegistrationAddressOverride) { -+ InetAddress dnAddress = Server.getRemoteIp(); -+ if (dnAddress != null) { -+ // Mostly called inside an RPC, update ip and peer hostname -+ String hostname = dnAddress.getHostName(); -+ String ip = dnAddress.getHostAddress(); -+ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { -+ // Reject registration of unresolved datanode to prevent performance -+ // impact of repetitive DNS lookups later. -+ final String message = "hostname cannot be resolved (ip=" -+ + ip + ", hostname=" + hostname + ")"; -+ LOG.warn("Unresolved datanode registration: " + message); -+ throw new DisallowedDatanodeException(nodeReg, message); -+ } -+ // update node registration with the ip and hostname from rpc request -+ nodeReg.setIpAddr(ip); -+ nodeReg.setPeerHostName(hostname); - } -- // update node registration with the ip and hostname from rpc request -- nodeReg.setIpAddr(ip); -- nodeReg.setPeerHostName(hostname); - } -- -+ - try { - nodeReg.setExportedKeys(blockManager.getBlockKeys()); -- -+ - // Checks if the node is not on the hosts list. If it is not, then - // it will be disallowed from registering. - if (!hostConfigManager.isIncluded(nodeReg)) { -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -index 21b92db307..5d3437239c 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -@@ -101,6 +101,11 @@ public class DNConf { - final boolean syncOnClose; - final boolean encryptDataTransfer; - final boolean connectToDnViaHostname; -+ private final String registeredHostname; -+ private final int registeredDataPort; -+ private final int registeredHttpPort; -+ private final int registeredHttpsPort; -+ private final int registeredIpcPort; - final boolean overwriteDownstreamDerivedQOP; - private final boolean pmemCacheRecoveryEnabled; - -@@ -189,6 +194,11 @@ public class DNConf { - connectToDnViaHostname = getConf().getBoolean( - DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, - DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); -+ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); -+ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); -+ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); -+ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); -+ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); - this.blockReportInterval = getConf().getLong( - DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, - DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); -@@ -363,6 +373,66 @@ public class DNConf { - return connectToDnViaHostname; - } - -+ /** -+ * Returns a hostname to register with the cluster instead of the system -+ * hostname. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected hostname. -+ * -+ * @return null if the system hostname should be used, otherwise a hostname -+ */ -+ public String getRegisteredHostname() { -+ return registeredHostname; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * data port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredDataPort() { -+ return registeredDataPort; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * HTTP port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredHttpPort() { -+ return registeredHttpPort; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * HTTPS port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredHttpsPort() { -+ return registeredHttpsPort; -+ } -+ -+ /** -+ * Returns a port number to register with the cluster instead of the -+ * IPC port that the node is listening on. -+ * This is an expert setting and can be used in multihoming scenarios to -+ * override the detected port. -+ * -+ * @return -1 if the actual port should be used, otherwise a port number -+ */ -+ public int getRegisteredIpcPort() { -+ return registeredIpcPort; -+ } -+ - /** - * Returns socket timeout - * -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -index 956f5bbe51..22ae127d98 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -@@ -135,6 +135,7 @@ import java.util.HashSet; - import java.util.Iterator; - import java.util.List; - import java.util.Map; -+import java.util.Optional; - import java.util.Map.Entry; - import java.util.Set; - import java.util.UUID; -@@ -2076,11 +2077,35 @@ public class DataNode extends ReconfigurableBase - NodeType.DATA_NODE); - } - -- DatanodeID dnId = new DatanodeID( -- streamingAddr.getAddress().getHostAddress(), hostName, -- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), -- infoSecurePort, getIpcPort()); -- return new DatanodeRegistration(dnId, storageInfo, -+ String registeredHostname = Optional -+ .ofNullable(dnConf.getRegisteredHostname()) -+ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); -+ int registeredDataPort = dnConf.getRegisteredDataPort(); -+ if (registeredDataPort == -1) { -+ registeredDataPort = getXferPort(); -+ } -+ int registeredHttpPort = dnConf.getRegisteredHttpPort(); -+ if (registeredHttpPort == -1) { -+ registeredHttpPort = getInfoPort(); -+ } -+ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); -+ if (registeredHttpsPort == -1) { -+ registeredHttpsPort = getInfoSecurePort(); -+ } -+ int registeredIpcPort = dnConf.getRegisteredIpcPort(); -+ if (registeredIpcPort == -1) { -+ registeredIpcPort = getIpcPort(); -+ } -+ -+ DatanodeID dnId = new DatanodeID(registeredHostname, -+ registeredHostname, -+ storage.getDatanodeUuid(), -+ registeredDataPort, -+ registeredHttpPort, -+ registeredHttpsPort, -+ registeredIpcPort); -+ -+ return new DatanodeRegistration(dnId, storageInfo, - new ExportedBlockKeys(), VersionInfo.getVersion()); - } - diff --git a/hadoop/stackable/patches/3.4.0/0003-Async-profiler-also-grab-itimer-events.patch b/hadoop/stackable/patches/3.4.0/0003-Async-profiler-also-grab-itimer-events.patch deleted file mode 100644 index 2bd82be74..000000000 --- a/hadoop/stackable/patches/3.4.0/0003-Async-profiler-also-grab-itimer-events.patch +++ /dev/null @@ -1,29 +0,0 @@ -From e0e0e15d1cb8e686c72bbc6699e0b4789f6e334d Mon Sep 17 00:00:00 2001 -From: Siegfried Weber -Date: Tue, 6 Feb 2024 16:10:54 +0100 -Subject: Async-profiler: also grab itimer events - ---- - .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -index ce53274151..909892ff90 100644 ---- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java -@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; - * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) - * // Perf events: - * // cpu -+ * // itimer - * // page-faults - * // context-switches - * // cycles -@@ -118,6 +119,7 @@ public class ProfileServlet extends HttpServlet { - private enum Event { - - CPU("cpu"), -+ ITIMER("itimer"), - ALLOC("alloc"), - LOCK("lock"), - PAGE_FAULTS("page-faults"), diff --git a/hadoop/stackable/patches/3.4.0/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch b/hadoop/stackable/patches/3.4.0/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch deleted file mode 100644 index d8382bc21..000000000 --- a/hadoop/stackable/patches/3.4.0/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch +++ /dev/null @@ -1,200 +0,0 @@ -From f6f99436ff36b8b56f3af105501a4b15ee4e8d44 Mon Sep 17 00:00:00 2001 -From: Sebastian Bernauer -Date: Thu, 15 Feb 2024 15:33:43 +0100 -Subject: HDFS-17378: Fix missing operationType for some operations in - authorizer - ---- - .../hdfs/server/namenode/FSNamesystem.java | 41 +++++++++++-------- - 1 file changed, 24 insertions(+), 17 deletions(-) - -diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java -index 277cc42222..75766bcd69 100644 ---- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java -+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java -@@ -2618,15 +2618,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - * @throws IOException - */ - BlockStoragePolicy getStoragePolicy(String src) throws IOException { -+ final String operationName = "getStoragePolicy"; - checkOperation(OperationCategory.READ); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - readLock(); - try { - checkOperation(OperationCategory.READ); - return FSDirAttrOp.getStoragePolicy(dir, pc, blockManager, src); - } finally { -- readUnlock("getStoragePolicy"); -+ readUnlock(operationName); - } - } - -@@ -2646,15 +2647,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - } - - long getPreferredBlockSize(String src) throws IOException { -+ final String operationName = "getPreferredBlockSize"; - checkOperation(OperationCategory.READ); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - readLock(); - try { - checkOperation(OperationCategory.READ); - return FSDirAttrOp.getPreferredBlockSize(dir, pc, src); - } finally { -- readUnlock("getPreferredBlockSize"); -+ readUnlock(operationName); - } - } - -@@ -2707,7 +2709,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - boolean createParent, short replication, long blockSize, - CryptoProtocolVersion[] supportedVersions, String ecPolicyName, - String storagePolicy, boolean logRetryCache) throws IOException { -- - HdfsFileStatus status; - try { - status = startFileInt(src, permissions, holder, clientMachine, flag, -@@ -2727,6 +2728,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - long blockSize, CryptoProtocolVersion[] supportedVersions, - String ecPolicyName, String storagePolicy, boolean logRetryCache) - throws IOException { -+ final String operationName = "create"; - if (NameNode.stateChangeLog.isDebugEnabled()) { - StringBuilder builder = new StringBuilder(); - builder.append("DIR* NameSystem.startFile: src=").append(src) -@@ -2764,7 +2766,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -2827,7 +2829,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - dir.writeUnlock(); - } - } finally { -- writeUnlock("create", getLockReportInfoSupplier(src, null, stat)); -+ writeUnlock(operationName, getLockReportInfoSupplier(src, null, stat)); - // There might be transactions logged while trying to recover the lease. - // They need to be sync'ed even when an exception was thrown. - if (!skipSync) { -@@ -2856,10 +2858,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - */ - boolean recoverLease(String src, String holder, String clientMachine) - throws IOException { -+ final String operationName = "recoverLease"; - boolean skipSync = false; - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -2880,7 +2883,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - skipSync = true; - throw se; - } finally { -- writeUnlock("recoverLease"); -+ writeUnlock(operationName); - // There might be transactions logged while trying to recover the lease. - // They need to be sync'ed even when an exception was thrown. - if (!skipSync) { -@@ -3096,6 +3099,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - final Set excludes, - final int numAdditionalNodes, final String clientName - ) throws IOException { -+ final String operationName = "getAdditionalDatanode"; - //check if the feature is enabled - dtpReplaceDatanodeOnFailure.checkEnabled(); - -@@ -3107,7 +3111,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - final BlockType blockType; - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - readLock(); - try { - // Changing this operation category to WRITE instead of making getAdditionalDatanode as a -@@ -3133,7 +3137,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", - src, fileId, blk, clientName, clientMachine)); - } finally { -- readUnlock("getAdditionalDatanode"); -+ readUnlock(operationName); - } - - if (clientnode == null) { -@@ -3155,10 +3159,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - */ - void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) - throws IOException { -+ final String operationName = "abandonBlock"; - NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -3167,7 +3172,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - NameNode.stateChangeLog.debug( - "BLOCK* NameSystem.abandonBlock: {} is removed from pendingCreates", b); - } finally { -- writeUnlock("abandonBlock"); -+ writeUnlock(operationName); - } - getEditLog().logSync(); - } -@@ -3221,10 +3226,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - boolean completeFile(final String src, String holder, - ExtendedBlock last, long fileId) - throws IOException { -+ final String operationName = "completeFile"; - boolean success = false; - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -3232,7 +3238,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, - fileId); - } finally { -- writeUnlock("completeFile"); -+ writeUnlock(operationName); - } - getEditLog().logSync(); - if (success) { -@@ -3666,10 +3672,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - */ - void fsync(String src, long fileId, String clientName, long lastBlockLength) - throws IOException { -+ final String operationName = "fsync"; - NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); - checkOperation(OperationCategory.WRITE); - final FSPermissionChecker pc = getPermissionChecker(); -- FSPermissionChecker.setOperationType(null); -+ FSPermissionChecker.setOperationType(operationName); - writeLock(); - try { - checkOperation(OperationCategory.WRITE); -@@ -3683,7 +3690,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, - } - FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); - } finally { -- writeUnlock("fsync"); -+ writeUnlock(operationName); - } - getEditLog().logSync(); - } diff --git a/hadoop/stackable/patches/3.4.0/0005-Update-CycloneDX-plugin.patch b/hadoop/stackable/patches/3.4.0/0005-Update-CycloneDX-plugin.patch deleted file mode 100644 index 57280c994..000000000 --- a/hadoop/stackable/patches/3.4.0/0005-Update-CycloneDX-plugin.patch +++ /dev/null @@ -1,44 +0,0 @@ -From c0f56826d39506f5edb1c4b7211f118de2324fca Mon Sep 17 00:00:00 2001 -From: Lukas Voetmand -Date: Fri, 6 Sep 2024 17:53:52 +0200 -Subject: Update CycloneDX plugin - ---- - pom.xml | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/pom.xml b/pom.xml -index 0a7a478ad9..3ceaf7a4ad 100644 ---- a/pom.xml -+++ b/pom.xml -@@ -118,7 +118,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x - 4.2.0 - 1.1.1 - 3.10.1 -- 2.7.10 -+ 2.8.0 - - bash - -@@ -770,6 +770,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x - org.cyclonedx - cyclonedx-maven-plugin - ${cyclonedx.version} -+ -+ application -+ 1.5 -+ false -+ - - - package -@@ -778,9 +783,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x - - - -- -- xml -- - - - diff --git a/hadoop/stackable/patches/3.4.0/0006-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch b/hadoop/stackable/patches/3.4.0/0006-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch deleted file mode 100644 index 29d550ffb..000000000 --- a/hadoop/stackable/patches/3.4.0/0006-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch +++ /dev/null @@ -1,997 +0,0 @@ -From eaf09b92eeea2d52656529b38d778e2225d27e0e Mon Sep 17 00:00:00 2001 -From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> -Date: Fri, 7 Jun 2024 19:03:23 +0530 -Subject: HADOOP-18516: [ABFS][Authentication] Support Fixed SAS Token for ABFS - Authentication (#6552) - -Contributed by Anuj Modi ---- - .../hadoop/fs/azurebfs/AbfsConfiguration.java | 75 +++++--- - .../fs/azurebfs/AzureBlobFileSystem.java | 3 +- - .../fs/azurebfs/AzureBlobFileSystemStore.java | 2 +- - .../azurebfs/constants/ConfigurationKeys.java | 5 +- - .../fs/azurebfs/services/AbfsClient.java | 9 +- - .../services/FixedSASTokenProvider.java | 65 +++++++ - .../hadoop-azure/src/site/markdown/abfs.md | 149 +++++++++++--- - .../azurebfs/AbstractAbfsIntegrationTest.java | 23 ++- - .../ITestAzureBlobFileSystemChooseSAS.java | 182 ++++++++++++++++++ - .../MockDelegationSASTokenProvider.java | 2 +- - .../extensions/MockSASTokenProvider.java | 16 +- - .../azurebfs/utils/AccountSASGenerator.java | 103 ++++++++++ - .../fs/azurebfs/utils/SASGenerator.java | 34 +++- - .../azurebfs/utils/ServiceSASGenerator.java | 15 +- - 14 files changed, 611 insertions(+), 72 deletions(-) - create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java - create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java - create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java - -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java -index eff8c08605..5c14a4af5c 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java -@@ -22,6 +22,7 @@ import java.io.IOException; - import java.lang.reflect.Field; - - import org.apache.hadoop.classification.VisibleForTesting; -+import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; - import org.apache.hadoop.util.Preconditions; - - import org.apache.commons.lang3.StringUtils; -@@ -941,33 +942,63 @@ public class AbfsConfiguration{ - } - } - -+ /** -+ * Returns the SASTokenProvider implementation to be used to generate SAS token.
-+ * Users can choose between a custom implementation of {@link SASTokenProvider} -+ * or an in house implementation {@link FixedSASTokenProvider}.
-+ * For Custom implementation "fs.azure.sas.token.provider.type" needs to be provided.
-+ * For Fixed SAS Token use "fs.azure.sas.fixed.token" needs to be provided.
-+ * In case both are provided, Preference will be given to Custom implementation.
-+ * Avoid using a custom tokenProvider implementation just to read the configured -+ * fixed token, as this could create confusion. Also,implementing the SASTokenProvider -+ * requires relying on the raw configurations. It is more stable to depend on -+ * the AbfsConfiguration with which a filesystem is initialized, and eliminate -+ * chances of dynamic modifications and spurious situations.
-+ * @return sasTokenProvider object based on configurations provided -+ * @throws AzureBlobFileSystemException -+ */ - public SASTokenProvider getSASTokenProvider() throws AzureBlobFileSystemException { - AuthType authType = getEnum(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey); - if (authType != AuthType.SAS) { - throw new SASTokenProviderException(String.format( -- "Invalid auth type: %s is being used, expecting SAS", authType)); -+ "Invalid auth type: %s is being used, expecting SAS.", authType)); - } - - try { -- String configKey = FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; -- Class sasTokenProviderClass = -- getTokenProviderClass(authType, configKey, null, -- SASTokenProvider.class); -- -- Preconditions.checkArgument(sasTokenProviderClass != null, -- String.format("The configuration value for \"%s\" is invalid.", configKey)); -- -- SASTokenProvider sasTokenProvider = ReflectionUtils -- .newInstance(sasTokenProviderClass, rawConfig); -- Preconditions.checkArgument(sasTokenProvider != null, -- String.format("Failed to initialize %s", sasTokenProviderClass)); -- -- LOG.trace("Initializing {}", sasTokenProviderClass.getName()); -- sasTokenProvider.initialize(rawConfig, accountName); -- LOG.trace("{} init complete", sasTokenProviderClass.getName()); -- return sasTokenProvider; -+ Class customSasTokenProviderImplementation = -+ getTokenProviderClass(authType, FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, -+ null, SASTokenProvider.class); -+ String configuredFixedToken = this.getTrimmedPasswordString(FS_AZURE_SAS_FIXED_TOKEN, EMPTY_STRING); -+ -+ if (customSasTokenProviderImplementation == null && configuredFixedToken.isEmpty()) { -+ throw new SASTokenProviderException(String.format( -+ "At least one of the \"%s\" and \"%s\" must be set.", -+ FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, FS_AZURE_SAS_FIXED_TOKEN)); -+ } -+ -+ // Prefer Custom SASTokenProvider Implementation if configured. -+ if (customSasTokenProviderImplementation != null) { -+ LOG.trace("Using Custom SASTokenProvider implementation because it is given precedence when it is set."); -+ SASTokenProvider sasTokenProvider = ReflectionUtils.newInstance( -+ customSasTokenProviderImplementation, rawConfig); -+ if (sasTokenProvider == null) { -+ throw new SASTokenProviderException(String.format( -+ "Failed to initialize %s", customSasTokenProviderImplementation)); -+ } -+ LOG.trace("Initializing {}", customSasTokenProviderImplementation.getName()); -+ sasTokenProvider.initialize(rawConfig, accountName); -+ LOG.trace("{} init complete", customSasTokenProviderImplementation.getName()); -+ return sasTokenProvider; -+ } else { -+ LOG.trace("Using FixedSASTokenProvider implementation"); -+ FixedSASTokenProvider fixedSASTokenProvider = new FixedSASTokenProvider(configuredFixedToken); -+ return fixedSASTokenProvider; -+ } -+ } catch (SASTokenProviderException e) { -+ throw e; - } catch (Exception e) { -- throw new TokenAccessProviderException("Unable to load SAS token provider class: " + e, e); -+ throw new SASTokenProviderException( -+ "Unable to load SAS token provider class: " + e, e); - } - } - -@@ -980,14 +1011,14 @@ public class AbfsConfiguration{ - Class encryptionContextClass = - getAccountSpecificClass(configKey, null, - EncryptionContextProvider.class); -- Preconditions.checkArgument(encryptionContextClass != null, String.format( -+ Preconditions.checkArgument(encryptionContextClass != null, - "The configuration value for %s is invalid, or config key is not account-specific", -- configKey)); -+ configKey); - - EncryptionContextProvider encryptionContextProvider = - ReflectionUtils.newInstance(encryptionContextClass, rawConfig); - Preconditions.checkArgument(encryptionContextProvider != null, -- String.format("Failed to initialize %s", encryptionContextClass)); -+ "Failed to initialize %s", encryptionContextClass); - - LOG.trace("{} init complete", encryptionContextClass.getName()); - return encryptionContextProvider; -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java -index b234f76d5d..0b6ed90658 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java -@@ -1308,10 +1308,9 @@ public class AzureBlobFileSystem extends FileSystem - - /** - * Incrementing exists() calls from superclass for statistic collection. -- * - * @param f source path. - * @return true if the path exists. -- * @throws IOException -+ * @throws IOException if some issue in checking path. - */ - @Override - public boolean exists(Path f) throws IOException { -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java -index d9693dd7e1..dc4e585a7b 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java -@@ -1728,7 +1728,7 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { - creds = new SharedKeyCredentials(accountName.substring(0, dotIndex), - abfsConfiguration.getStorageAccountKey()); - } else if (authType == AuthType.SAS) { -- LOG.trace("Fetching SAS token provider"); -+ LOG.trace("Fetching SAS Token Provider"); - sasTokenProvider = abfsConfiguration.getSASTokenProvider(); - } else { - LOG.trace("Fetching token provider"); -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java -index a27c757026..b85e8c31d5 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java -@@ -269,7 +269,10 @@ public final class ConfigurationKeys { - public static final String FS_AZURE_ENABLE_DELEGATION_TOKEN = "fs.azure.enable.delegation.token"; - public static final String FS_AZURE_DELEGATION_TOKEN_PROVIDER_TYPE = "fs.azure.delegation.token.provider.type"; - -- /** Key for SAS token provider **/ -+ /** Key for fixed SAS token: {@value}. **/ -+ public static final String FS_AZURE_SAS_FIXED_TOKEN = "fs.azure.sas.fixed.token"; -+ -+ /** Key for SAS token provider: {@value}. **/ - public static final String FS_AZURE_SAS_TOKEN_PROVIDER_TYPE = "fs.azure.sas.token.provider.type"; - - /** For performance, AbfsInputStream/AbfsOutputStream re-use SAS tokens until the expiry is within this number of seconds. **/ -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java -index 8eeb548f50..a013af0a35 100644 ---- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java -@@ -945,6 +945,7 @@ public class AbfsClient implements Closeable { - abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RETAIN_UNCOMMITTED_DATA, String.valueOf(retainUncommittedData)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, String.valueOf(isClose)); -+ - // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance - String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, - abfsUriQueryBuilder, cachedSasToken); -@@ -1035,6 +1036,7 @@ public class AbfsClient implements Closeable { - requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); -+ - // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance - String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, - abfsUriQueryBuilder, cachedSasToken); -@@ -1325,16 +1327,17 @@ public class AbfsClient implements Closeable { - sasToken = cachedSasToken; - LOG.trace("Using cached SAS token."); - } -+ - // if SAS Token contains a prefix of ?, it should be removed - if (sasToken.charAt(0) == '?') { - sasToken = sasToken.substring(1); - } -+ - queryBuilder.setSASToken(sasToken); - LOG.trace("SAS token fetch complete for {} on {}", operation, path); - } catch (Exception ex) { -- throw new SASTokenProviderException(String.format("Failed to acquire a SAS token for %s on %s due to %s", -- operation, -- path, -+ throw new SASTokenProviderException(String.format( -+ "Failed to acquire a SAS token for %s on %s due to %s", operation, path, - ex.toString())); - } - } -diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java -new file mode 100644 -index 0000000000..1a2614dcc1 ---- /dev/null -+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java -@@ -0,0 +1,65 @@ -+/** -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.fs.azurebfs.services; -+ -+import java.io.IOException; -+ -+import org.apache.hadoop.conf.Configuration; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; -+import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; -+ -+/** -+ * In house implementation of {@link SASTokenProvider} to use a fixed SAS token with ABFS. -+ * Use this to avoid implementing a Custom Token Provider just to return fixed SAS. -+ * Fixed SAS Token to be provided using the config "fs.azure.sas.fixed.token". -+ */ -+public class FixedSASTokenProvider implements SASTokenProvider { -+ private String fixedSASToken; -+ -+ public FixedSASTokenProvider(final String fixedSASToken) throws SASTokenProviderException { -+ this.fixedSASToken = fixedSASToken; -+ if (fixedSASToken == null || fixedSASToken.isEmpty()) { -+ throw new SASTokenProviderException( -+ String.format("Configured Fixed SAS Token is Invalid: %s", fixedSASToken)); -+ } -+ } -+ -+ @Override -+ public void initialize(final Configuration configuration, -+ final String accountName) -+ throws IOException { -+ } -+ -+ /** -+ * Returns the fixed SAS Token configured. -+ * @param account the name of the storage account. -+ * @param fileSystem the name of the fileSystem. -+ * @param path the file or directory path. -+ * @param operation the operation to be performed on the path. -+ * @return Fixed SAS Token -+ * @throws IOException never -+ */ -+ @Override -+ public String getSASToken(final String account, -+ final String fileSystem, -+ final String path, -+ final String operation) throws IOException { -+ return fixedSASToken; -+ } -+} -diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md -index 9021f3e3b1..78094b3813 100644 ---- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md -+++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md -@@ -12,7 +12,7 @@ - limitations under the License. See accompanying LICENSE file. - --> - --# Hadoop Azure Support: ABFS — Azure Data Lake Storage Gen2 -+# Hadoop Azure Support: ABFS - Azure Data Lake Storage Gen2 - - - -@@ -309,12 +309,13 @@ in different deployment situations. - The ABFS client can be deployed in different ways, with its authentication needs - driven by them. - --1. With the storage account's authentication secret in the configuration: --"Shared Key". --1. Using OAuth 2.0 tokens of one form or another. --1. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, -- "Managed Instance". --1. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. -+1. With the storage account's authentication secret in the configuration: "Shared Key". -+2. Using OAuth 2.0 tokens of one form or another. -+3. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, "Managed Instance". -+4. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. -+5. By directly configuring a fixed Shared Access Signature (SAS) token in the account configuration settings files. -+ -+Note: SAS Based Authentication should be used only with HNS Enabled accounts. - - What can be changed is what secrets/credentials are used to authenticate the caller. - -@@ -355,14 +356,14 @@ the password, "key", retrieved from the XML/JCECKs configuration files. - - ```xml - -- fs.azure.account.auth.type.abfswales1.dfs.core.windows.net -+ fs.azure.account.auth.type.ACCOUNT_NAME.dfs.core.windows.net - SharedKey - - - - -- fs.azure.account.key.abfswales1.dfs.core.windows.net -- ZGlkIHlvdSByZWFsbHkgdGhpbmsgSSB3YXMgZ29pbmcgdG8gcHV0IGEga2V5IGluIGhlcmU/IA== -+ fs.azure.account.key.ACCOUNT_NAME.dfs.core.windows.net -+ ACCOUNT_KEY - - The secret password. Never share these. - -@@ -609,21 +610,119 @@ In case delegation token is enabled, and the config `fs.azure.delegation.token - - ### Shared Access Signature (SAS) Token Provider - --A Shared Access Signature (SAS) token provider supplies the ABFS connector with SAS --tokens by implementing the SASTokenProvider interface. -- --```xml -- -- fs.azure.account.auth.type -- SAS -- -- -- fs.azure.sas.token.provider.type -- {fully-qualified-class-name-for-implementation-of-SASTokenProvider-interface} -- --``` -- --The declared class must implement `org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. -+A shared access signature (SAS) provides secure delegated access to resources in -+your storage account. With a SAS, you have granular control over how a client can access your data. -+To know more about how SAS Authentication works refer to -+[Grant limited access to Azure Storage resources using shared access signatures (SAS)](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) -+ -+There are three types of SAS supported by Azure Storage: -+- [User Delegation SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas): Recommended for use with ABFS Driver with HNS Enabled ADLS Gen2 accounts. It is Identity based SAS that works at blob/directory level) -+- [Service SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas): Global and works at container level. -+- [Account SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas): Global and works at account level. -+ -+#### Known Issues With SAS -+- SAS Based Authentication works only with HNS Enabled ADLS Gen2 Accounts which -+is a recommended account type to be used with ABFS. -+- Certain root level operations are known to fail with SAS Based Authentication. -+ -+#### Using User Delegation SAS with ABFS -+ -+- **Description**: ABFS allows you to implement your custom SAS Token Provider -+that uses your identity to create a user delegation key which then can be used to -+create SAS instead of storage account key. The declared class must implement -+`org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. -+ -+- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: -+ 1. Authentication Type: -+ ```xml -+ -+ fs.azure.account.auth.type -+ SAS -+ -+ ``` -+ -+ 1. Custom SAS Token Provider Class: -+ ```xml -+ -+ fs.azure.sas.token.provider.type -+ CUSTOM_SAS_TOKEN_PROVIDER_CLASS -+ -+ ``` -+ -+ Replace `CUSTOM_SAS_TOKEN_PROVIDER_CLASS` with fully qualified class name of -+your custom token provider implementation. Depending upon the implementation you -+might need to specify additional configurations that are required by your custom -+implementation. -+ -+- **Example**: ABFS Hadoop Driver provides a [MockDelegationSASTokenProvider](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java) -+implementation that can be used as an example on how to implement your own custom -+SASTokenProvider. This requires the Application credentials to be specifed using -+the following configurations apart from above two: -+ -+ 1. App Service Principle Tenant Id: -+ ```xml -+ -+ fs.azure.test.app.service.principal.tenant.id -+ TENANT_ID -+ -+ ``` -+ 1. App Service Principle Object Id: -+ ```xml -+ -+ fs.azure.test.app.service.principal.object.id -+ OBJECT_ID -+ -+ ``` -+ 1. App Id: -+ ```xml -+ -+ fs.azure.test.app.id -+ APPLICATION_ID -+ -+ ``` -+ 1. App Secret: -+ ```xml -+ -+ fs.azure.test.app.secret -+ APPLICATION_SECRET -+ -+ ``` -+ -+- **Security**: More secure than Shared Key and allows granting limited access -+to data without exposing the access key. Recommended to be used only with HNS Enabled, -+ADLS Gen 2 storage accounts. -+ -+#### Using Account/Service SAS with ABFS -+ -+- **Description**: ABFS allows user to use Account/Service SAS for authenticating -+requests. User can specify them as fixed SAS Token to be used across all the requests. -+ -+- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: -+ -+ 1. Authentication Type: -+ ```xml -+ -+ fs.azure.account.auth.type -+ SAS -+ -+ ``` -+ -+ 1. Fixed SAS Token: -+ ```xml -+ -+ fs.azure.sas.fixed.token -+ FIXED_SAS_TOKEN -+ -+ ``` -+ -+ Replace `FIXED_SAS_TOKEN` with fixed Account/Service SAS. You can also -+generate SAS from Azure portal. Account -> Security + Networking -> Shared Access Signature -+ -+- **Security**: Account/Service SAS requires account keys to be used which makes -+them less secure. There is no scope of having delegated access to different users. -+ -+*Note:* When `fs.azure.sas.token.provider.type` and `fs.azure.fixed.sas.token` -+are both configured, precedence will be given to the custom token provider implementation. - - ## Technical notes - -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java -index 66a1b22da9..c1750c848c 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java -@@ -284,13 +284,30 @@ public abstract class AbstractAbfsIntegrationTest extends - useConfiguredFileSystem = true; - } - -+ /** -+ * Create a filesystem for SAS tests using the SharedKey authentication. -+ * We do not allow filesystem creation with SAS because certain type of SAS do not have -+ * required permissions, and it is not known what type of SAS is configured by user. -+ * @throws Exception -+ */ - protected void createFilesystemForSASTests() throws Exception { -- // The SAS tests do not have permission to create a filesystem -- // so first create temporary instance of the filesystem using SharedKey -- // then re-use the filesystem it creates with SAS auth instead of SharedKey. -+ createFilesystemWithTestFileForSASTests(null); -+ } -+ -+ /** -+ * Create a filesystem for SAS tests along with a test file using SharedKey authentication. -+ * We do not allow filesystem creation with SAS because certain type of SAS do not have -+ * required permissions, and it is not known what type of SAS is configured by user. -+ * @param testPath path of the test file. -+ * @throws Exception -+ */ -+ protected void createFilesystemWithTestFileForSASTests(Path testPath) throws Exception { - try (AzureBlobFileSystem tempFs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig)){ - ContractTestUtils.assertPathExists(tempFs, "This path should exist", - new Path("/")); -+ if (testPath != null) { -+ tempFs.create(testPath).close(); -+ } - abfsConfig.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SAS.name()); - usingFilesystemForSASTests = true; - } -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java -new file mode 100644 -index 0000000000..d8db901151 ---- /dev/null -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java -@@ -0,0 +1,182 @@ -+/** -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+package org.apache.hadoop.fs.azurebfs; -+ -+import java.io.IOException; -+import java.nio.file.AccessDeniedException; -+ -+import org.assertj.core.api.Assertions; -+import org.junit.Assume; -+import org.junit.Test; -+ -+import org.apache.hadoop.fs.FileSystem; -+import org.apache.hadoop.fs.Path; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; -+import org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider; -+import org.apache.hadoop.fs.azurebfs.services.AuthType; -+import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; -+import org.apache.hadoop.fs.azurebfs.utils.AccountSASGenerator; -+import org.apache.hadoop.fs.azurebfs.utils.Base64; -+ -+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_FIXED_TOKEN; -+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; -+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.accountProperty; -+import static org.apache.hadoop.test.LambdaTestUtils.intercept; -+ -+/** -+ * Tests to validate the choice between using a custom SASTokenProvider -+ * implementation and FixedSASTokenProvider. -+ */ -+public class ITestAzureBlobFileSystemChooseSAS extends AbstractAbfsIntegrationTest{ -+ -+ private String accountSAS = null; -+ private static final String TEST_PATH = "testPath"; -+ -+ /** -+ * To differentiate which SASTokenProvider was used we will use different type of SAS Tokens. -+ * FixedSASTokenProvider will return an Account SAS with only read permissions. -+ * SASTokenProvider will return a User Delegation SAS Token with both read and write permissions. -+= */ -+ public ITestAzureBlobFileSystemChooseSAS() throws Exception { -+ // SAS Token configured might not have permissions for creating file system. -+ // Shared Key must be configured to create one. Once created, a new instance -+ // of same file system will be used with SAS Authentication. -+ Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); -+ } -+ -+ @Override -+ public void setup() throws Exception { -+ createFilesystemWithTestFileForSASTests(new Path(TEST_PATH)); -+ super.setup(); -+ generateAccountSAS(); -+ } -+ -+ /** -+ * Generates an Account SAS Token using the Account Shared Key to be used as a fixed SAS Token. -+ * Account SAS used here will have only read permissions to resources. -+ * This will be used by individual tests to set in the configurations. -+ * @throws AzureBlobFileSystemException -+ */ -+ private void generateAccountSAS() throws AzureBlobFileSystemException { -+ final String accountKey = getConfiguration().getStorageAccountKey(); -+ AccountSASGenerator configAccountSASGenerator = new AccountSASGenerator(Base64.decode(accountKey)); -+ // Setting only read permissions. -+ configAccountSASGenerator.setPermissions("r"); -+ accountSAS = configAccountSASGenerator.getAccountSAS(getAccountName()); -+ } -+ -+ /** -+ * Tests the scenario where both the custom SASTokenProvider and a fixed SAS token are configured. -+ * Custom implementation of SASTokenProvider class should be chosen and User Delegation SAS should be used. -+ * @throws Exception -+ */ -+ @Test -+ public void testBothProviderFixedTokenConfigured() throws Exception { -+ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( -+ getRawConfiguration(), this.getAccountName()); -+ removeAnyPresetConfiguration(testAbfsConfig); -+ -+ // Configuring a SASTokenProvider class which provides a user delegation SAS. -+ testAbfsConfig.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, -+ MockDelegationSASTokenProvider.class.getName()); -+ -+ // configuring the Fixed SAS token which is an Account SAS. -+ testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); -+ -+ // Creating a new file system with updated configs. -+ try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) -+ FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { -+ -+ // Asserting that MockDelegationSASTokenProvider is used. -+ Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) -+ .describedAs("Custom SASTokenProvider Class must be used") -+ .isInstanceOf(MockDelegationSASTokenProvider.class); -+ -+ // Assert that User Delegation SAS is used and both read and write operations are permitted. -+ Path testPath = path(getMethodName()); -+ newTestFs.create(testPath).close(); -+ newTestFs.open(testPath).close(); -+ } -+ } -+ -+ /** -+ * Tests the scenario where only the fixed token is configured, and no token provider class is set. -+ * Account SAS Token configured as fixed SAS should be used. -+ * Also verifies that Account Specific as well as Account Agnostic Fixed SAS Token Works. -+ * @throws IOException -+ */ -+ @Test -+ public void testOnlyFixedTokenConfigured() throws Exception { -+ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( -+ getRawConfiguration(), this.getAccountName()); -+ -+ // setting an Account Specific Fixed SAS token. -+ removeAnyPresetConfiguration(testAbfsConfig); -+ testAbfsConfig.set(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName()), accountSAS); -+ testOnlyFixedTokenConfiguredInternal(testAbfsConfig); -+ -+ // setting an Account Agnostic Fixed SAS token. -+ removeAnyPresetConfiguration(testAbfsConfig); -+ testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); -+ testOnlyFixedTokenConfiguredInternal(testAbfsConfig); -+ } -+ -+ private void testOnlyFixedTokenConfiguredInternal(AbfsConfiguration testAbfsConfig) throws Exception { -+ // Creating a new filesystem with updated configs. -+ try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) -+ FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { -+ -+ // Asserting that FixedSASTokenProvider is used. -+ Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) -+ .describedAs("FixedSASTokenProvider Class must be used") -+ .isInstanceOf(FixedSASTokenProvider.class); -+ -+ // Assert that Account SAS is used and only read operations are permitted. -+ Path testPath = path(getMethodName()); -+ intercept(AccessDeniedException.class, () -> { -+ newTestFs.create(testPath); -+ }); -+ // Read Operation is permitted -+ newTestFs.getFileStatus(new Path(TEST_PATH)); -+ } -+ } -+ -+ /** -+ * Tests the scenario where both the token provider class and the fixed token are not configured. -+ * The code errors out at the initialization stage itself. -+ * @throws IOException -+ */ -+ @Test -+ public void testBothProviderFixedTokenUnset() throws Exception { -+ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( -+ getRawConfiguration(), this.getAccountName()); -+ removeAnyPresetConfiguration(testAbfsConfig); -+ -+ intercept(SASTokenProviderException.class, () -> { -+ FileSystem.newInstance(testAbfsConfig.getRawConfiguration()); -+ }); -+ } -+ -+ private void removeAnyPresetConfiguration(AbfsConfiguration testAbfsConfig) { -+ testAbfsConfig.unset(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE); -+ testAbfsConfig.unset(FS_AZURE_SAS_FIXED_TOKEN); -+ testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, this.getAccountName())); -+ testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName())); -+ } -+} -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java -index cf7d51da4c..d1e5dd4519 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java -@@ -40,7 +40,7 @@ import org.apache.hadoop.fs.azurebfs.utils.SASGenerator; - import org.apache.hadoop.security.AccessControlException; - - /** -- * A mock SAS token provider implementation -+ * A mock SAS token provider implementation. - */ - public class MockDelegationSASTokenProvider implements SASTokenProvider { - -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java -index 50ac20970f..3fda128a9c 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java -@@ -20,7 +20,11 @@ package org.apache.hadoop.fs.azurebfs.extensions; - - import java.io.IOException; - -+import org.slf4j.Logger; -+import org.slf4j.LoggerFactory; -+ - import org.apache.hadoop.conf.Configuration; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; - import org.apache.hadoop.security.AccessControlException; - - import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; -@@ -28,17 +32,25 @@ import org.apache.hadoop.fs.azurebfs.utils.Base64; - import org.apache.hadoop.fs.azurebfs.utils.ServiceSASGenerator; - - /** -- * A mock SAS token provider implementation -+ * A mock SAS token provider implementation. - */ - public class MockSASTokenProvider implements SASTokenProvider { - - private byte[] accountKey; - private ServiceSASGenerator generator; - private boolean skipAuthorizationForTestSetup = false; -+ private static final Logger LOG = LoggerFactory.getLogger(MockSASTokenProvider.class); - - // For testing we use a container SAS for all operations. - private String generateSAS(byte[] accountKey, String accountName, String fileSystemName) { -- return generator.getContainerSASWithFullControl(accountName, fileSystemName); -+ String containerSAS = ""; -+ try { -+ containerSAS = generator.getContainerSASWithFullControl(accountName, fileSystemName); -+ } catch (InvalidConfigurationValueException e) { -+ LOG.debug(e.getMessage()); -+ containerSAS = ""; -+ } -+ return containerSAS; - } - - @Override -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java -new file mode 100644 -index 0000000000..2af741b7a4 ---- /dev/null -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java -@@ -0,0 +1,103 @@ -+/** -+ * Licensed to the Apache Software Foundation (ASF) under one -+ * or more contributor license agreements. See the NOTICE file -+ * distributed with this work for additional information -+ * regarding copyright ownership. The ASF licenses this file -+ * to you under the Apache License, Version 2.0 (the -+ * "License"); you may not use this file except in compliance -+ * with the License. You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+package org.apache.hadoop.fs.azurebfs.utils; -+ -+import java.time.Instant; -+ -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -+import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; -+ -+/** -+ * Test Account SAS Generator. -+ * SAS generated by this will have only read access to storage account blob and file services. -+ */ -+public class AccountSASGenerator extends SASGenerator { -+ /** -+ * Creates Account SAS from Storage Account Key. -+ * https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas. -+ * @param accountKey: the storage account key. -+ */ -+ public AccountSASGenerator(byte[] accountKey) { -+ super(accountKey); -+ } -+ -+ private String permissions = "racwdl"; -+ -+ public String getAccountSAS(String accountName) throws -+ AzureBlobFileSystemException { -+ // retaining only the account name -+ accountName = getCanonicalAccountName(accountName); -+ String sp = permissions; -+ String sv = "2021-06-08"; -+ String srt = "sco"; -+ -+ String st = ISO_8601_FORMATTER.format(Instant.now().minus(FIVE_MINUTES)); -+ String se = ISO_8601_FORMATTER.format(Instant.now().plus(ONE_DAY)); -+ -+ String ss = "bf"; -+ String spr = "https"; -+ String signature = computeSignatureForSAS(sp, ss, srt, st, se, sv, accountName); -+ -+ AbfsUriQueryBuilder qb = new AbfsUriQueryBuilder(); -+ qb.addQuery("sp", sp); -+ qb.addQuery("ss", ss); -+ qb.addQuery("srt", srt); -+ qb.addQuery("st", st); -+ qb.addQuery("se", se); -+ qb.addQuery("sv", sv); -+ qb.addQuery("sig", signature); -+ return qb.toString().substring(1); -+ } -+ -+ private String computeSignatureForSAS(String signedPerm, String signedService, String signedResType, -+ String signedStart, String signedExp, String signedVersion, String accountName) { -+ -+ StringBuilder sb = new StringBuilder(); -+ sb.append(accountName); -+ sb.append("\n"); -+ sb.append(signedPerm); -+ sb.append("\n"); -+ sb.append(signedService); -+ sb.append("\n"); -+ sb.append(signedResType); -+ sb.append("\n"); -+ sb.append(signedStart); -+ sb.append("\n"); -+ sb.append(signedExp); -+ sb.append("\n"); -+ sb.append("\n"); // signedIP -+ sb.append("\n"); // signedProtocol -+ sb.append(signedVersion); -+ sb.append("\n"); -+ sb.append("\n"); //signed encryption scope -+ -+ String stringToSign = sb.toString(); -+ LOG.debug("Account SAS stringToSign: " + stringToSign.replace("\n", ".")); -+ return computeHmac256(stringToSign); -+ } -+ -+ /** -+ * By default Account SAS has all the available permissions. Use this to -+ * override the default permissions and set as per the requirements. -+ * @param permissions -+ */ -+ public void setPermissions(final String permissions) { -+ this.permissions = permissions; -+ } -+} -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java -index 2e9289d8d4..a80ddac5ed 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java -@@ -29,6 +29,10 @@ import javax.crypto.spec.SecretKeySpec; - - import org.slf4j.Logger; - import org.slf4j.LoggerFactory; -+ -+import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; -+ - /** - * Test SAS generator. - */ -@@ -54,10 +58,8 @@ public abstract class SASGenerator { - protected static final Logger LOG = LoggerFactory.getLogger(SASGenerator.class); - public static final Duration FIVE_MINUTES = Duration.ofMinutes(5); - public static final Duration ONE_DAY = Duration.ofDays(1); -- public static final DateTimeFormatter ISO_8601_FORMATTER = -- DateTimeFormatter -- .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT) -- .withZone(ZoneId.of("UTC")); -+ public static final DateTimeFormatter ISO_8601_FORMATTER = DateTimeFormatter -+ .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT).withZone(ZoneId.of("UTC")); - - private Mac hmacSha256; - private byte[] key; -@@ -68,7 +70,7 @@ public abstract class SASGenerator { - - /** - * Called by subclasses to initialize the cryptographic SHA-256 HMAC provider. -- * @param key - a 256-bit secret key -+ * @param key - a 256-bit secret key. - */ - protected SASGenerator(byte[] key) { - this.key = key; -@@ -85,6 +87,26 @@ public abstract class SASGenerator { - } - } - -+ protected String getCanonicalAccountName(String accountName) throws -+ InvalidConfigurationValueException { -+ // returns the account name without the endpoint -+ // given account names with endpoint have the format accountname.endpoint -+ // For example, input of xyz.dfs.core.windows.net should return "xyz" only -+ int dotIndex = accountName.indexOf(AbfsHttpConstants.DOT); -+ if (dotIndex == 0) { -+ // case when accountname starts with a ".": endpoint is present, accountName is null -+ // for example .dfs.azure.com, which is invalid -+ throw new InvalidConfigurationValueException("Account Name is not fully qualified"); -+ } -+ if (dotIndex > 0) { -+ // case when endpoint is present with accountName -+ return accountName.substring(0, dotIndex); -+ } else { -+ // case when accountName is already canonicalized -+ return accountName; -+ } -+ } -+ - protected String computeHmac256(final String stringToSign) { - byte[] utf8Bytes; - try { -@@ -98,4 +120,4 @@ public abstract class SASGenerator { - } - return Base64.encode(hmac); - } --} -\ No newline at end of file -+} -diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java -index 24a1cea255..0ae5239e8f 100644 ---- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java -+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java -@@ -20,23 +20,26 @@ package org.apache.hadoop.fs.azurebfs.utils; - - import java.time.Instant; - -+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; - import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; - - /** -- * Test Service SAS generator. -+ * Test Service SAS Generator. - */ - public class ServiceSASGenerator extends SASGenerator { - - /** -- * Creates a SAS Generator for Service SAS -+ * Creates a SAS Generator for Service SAS. - * (https://docs.microsoft.com/en-us/rest/api/storageservices/create-service-sas). -- * @param accountKey - the storage account key -+ * @param accountKey - the storage account key. - */ - public ServiceSASGenerator(byte[] accountKey) { - super(accountKey); - } - -- public String getContainerSASWithFullControl(String accountName, String containerName) { -+ public String getContainerSASWithFullControl(String accountName, String containerName) throws -+ InvalidConfigurationValueException { -+ accountName = getCanonicalAccountName(accountName); - String sp = "rcwdl"; - String sv = AuthenticationVersion.Feb20.toString(); - String sr = "c"; -@@ -66,7 +69,7 @@ public class ServiceSASGenerator extends SASGenerator { - sb.append("\n"); - sb.append(se); - sb.append("\n"); -- // canonicalized resource -+ // canonicalize resource - sb.append("/blob/"); - sb.append(accountName); - sb.append("/"); -@@ -93,4 +96,4 @@ public class ServiceSASGenerator extends SASGenerator { - LOG.debug("Service SAS stringToSign: " + stringToSign.replace("\n", ".")); - return computeHmac256(stringToSign); - } --} -\ No newline at end of file -+} diff --git a/hadoop/stackable/patches/3.4.0/patchable.toml b/hadoop/stackable/patches/3.4.0/patchable.toml deleted file mode 100644 index 038c64315..000000000 --- a/hadoop/stackable/patches/3.4.0/patchable.toml +++ /dev/null @@ -1,2 +0,0 @@ -base = "bd8b77f398f626bb7791783192ee7a5dfaeec760" -mirror = "https://github.com/stackabletech/hadoop.git" diff --git a/hadoop/versions.py b/hadoop/versions.py index 94f7a33dd..3a45287d1 100644 --- a/hadoop/versions.py +++ b/hadoop/versions.py @@ -1,14 +1,6 @@ versions = [ { - "product": "3.3.4", - "java-base": "11", - "java-devel": "11", - "async_profiler": "2.9", - "jmx_exporter": "1.2.0", - "protobuf": "3.7.1", - "hdfs_utils": "0.4.0", - }, - { + # Not part of SDP 25.7.0, but still required for hbase, hive, spark-k8s "product": "3.3.6", "java-base": "11", "java-devel": "11", @@ -17,15 +9,6 @@ "protobuf": "3.7.1", "hdfs_utils": "0.4.0", }, - { - "product": "3.4.0", - "java-base": "11", - "java-devel": "11", - "async_profiler": "2.9", - "jmx_exporter": "1.2.0", - "protobuf": "3.7.1", - "hdfs_utils": "0.4.0", - }, { "product": "3.4.1", "java-base": "11",