From a24eb390bf928bbb0a2325da006dd7604d45332a Mon Sep 17 00:00:00 2001 From: JeongMin Ju Date: Wed, 8 Apr 2026 10:26:14 +0900 Subject: [PATCH 1/2] HBASE-30058 Eliminate unnecessary connection creation in snapshot operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each snapshot operation created two short-lived connections in SnapshotDescriptionUtils.validate() — one for isSecurityAvailable() to check hbase:acl table existence, and another in writeAclToSnapshotDescription() to read ACL data. In Kerberos environments with ZKConnectionRegistry, each connection triggered a new ZK session with GSSAPI authentication and a TGS request to the KDC, causing excessive KDC load during batch snapshot operations. This patch reuses the caller's existing connection instead of creating new ones: - isSecurityAvailable() now accepts a Connection parameter - writeAclToSnapshotDescription() passes the shared connection's Table to PermissionStorage.getTablePermissions() - All callers (MasterRpcServices, RestoreSnapshotProcedure, CloneSnapshotProcedure) pass through their available connection The original validate(SnapshotDescription, Configuration) and isSecurityAvailable(Configuration) signatures are preserved as overloads that delegate to the new Connection-based methods, so existing callers (including tests) keep working unchanged. The new Connection-based overloads are documented as preferred for callers that already hold a Connection, to avoid the per-call connection setup cost. --- .../hbase/master/MasterRpcServices.java | 4 +- .../procedure/CloneSnapshotProcedure.java | 2 +- .../procedure/RestoreSnapshotProcedure.java | 2 +- .../security/access/PermissionStorage.java | 9 ++- .../snapshot/SnapshotDescriptionUtils.java | 58 ++++++++++++++++--- 5 files changed, 61 insertions(+), 14 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 85336eed2fb5..f4c6a23c39dc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -1792,8 +1792,8 @@ public SnapshotResponse snapshot(RpcController controller, SnapshotRequest reque LOG.info(server.getClientIdAuditPrefix() + " snapshot request for:" + ClientSnapshotDescriptionUtils.toString(request.getSnapshot())); // get the snapshot information - SnapshotDescription snapshot = - SnapshotDescriptionUtils.validate(request.getSnapshot(), server.getConfiguration()); + SnapshotDescription snapshot = SnapshotDescriptionUtils.validate(server.getConnection(), + request.getSnapshot(), server.getConfiguration()); // send back the max amount of time the client should wait for the snapshot to complete long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(server.getConfiguration(), snapshot.getType(), SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java index 19f5d9db41d5..3799dd1daf00 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java @@ -133,7 +133,7 @@ private void restoreSnapshotAcl(MasterProcedureEnv env) throws IOException { Configuration conf = env.getMasterServices().getConfiguration(); if ( restoreAcl && snapshot.hasUsersAndPermissions() && snapshot.getUsersAndPermissions() != null - && SnapshotDescriptionUtils.isSecurityAvailable(conf) + && SnapshotDescriptionUtils.isSecurityAvailable(env.getMasterServices().getConnection()) ) { RestoreSnapshotHelper.restoreSnapshotAcl(snapshot, tableDescriptor.getTableName(), conf); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java index e16b33741065..bb7a5582f4db 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java @@ -556,7 +556,7 @@ private void addRegionsToInMemoryStates(List regionInfos, MasterProc private void restoreSnapshotAcl(final MasterProcedureEnv env) throws IOException { if ( restoreAcl && snapshot.hasUsersAndPermissions() && snapshot.getUsersAndPermissions() != null - && SnapshotDescriptionUtils.isSecurityAvailable(env.getMasterServices().getConfiguration()) + && SnapshotDescriptionUtils.isSecurityAvailable(env.getMasterServices().getConnection()) ) { // restore acl of snapshot to table. RestoreSnapshotHelper.restoreSnapshotAcl(snapshot, TableName.valueOf(snapshot.getTable()), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/PermissionStorage.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/PermissionStorage.java index b66c0ed0b099..5fa1e114aad0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/PermissionStorage.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/PermissionStorage.java @@ -482,8 +482,13 @@ static Map> loadAll(Configuration c public static ListMultimap getTablePermissions(Configuration conf, TableName tableName) throws IOException { - return getPermissions(conf, tableName != null ? tableName.getName() : null, null, null, null, - null, false); + return getTablePermissions(conf, tableName, null); + } + + public static ListMultimap getTablePermissions(Configuration conf, + TableName tableName, Table t) throws IOException { + return getPermissions(conf, tableName != null ? tableName.getName() : null, t, null, null, null, + false); } public static ListMultimap getNamespacePermissions(Configuration conf, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java index 689cd89259ba..64249363337a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.security.access.AccessChecker; @@ -302,6 +303,35 @@ private static Path getDefaultWorkingSnapshotDir(final Path rootDir) { */ public static SnapshotDescription validate(SnapshotDescription snapshot, Configuration conf) throws IllegalArgumentException, IOException { + // Fail fast on the basic invariant before paying the cost of creating a Connection (which + // involves a ZK session and, in Kerberos environments, a TGS request to the KDC). This also + // preserves the prior behavior of this overload, which callers (including unit tests with + // stub Configurations) rely on to throw without performing any I/O. + if (!snapshot.hasTable()) { + throw new IllegalArgumentException( + "Descriptor doesn't apply to a table, so we can't build it."); + } + try (Connection conn = ConnectionFactory.createConnection(conf)) { + return validate(conn, snapshot, conf); + } + } + + /** + * Convert the passed snapshot description into a 'full' snapshot description based on default + * parameters, if none have been supplied. This resolves any 'optional' parameters that aren't + * supplied to their default values. Prefer this overload over + * {@link #validate(SnapshotDescription, Configuration)} when the caller already holds a + * {@link Connection}, to avoid the cost of creating a fresh connection (which involves a ZK + * session and, in Kerberos environments, a TGS request to the KDC) for each call. + * @param conn connection to use for reading ACL information when security is enabled + * @param snapshot general snapshot descriptor + * @param conf Configuration to read configured snapshot defaults if snapshot is not complete + * @return a valid snapshot description + * @throws IllegalArgumentException if the {@link SnapshotDescription} is not a complete + * {@link SnapshotDescription}. + */ + public static SnapshotDescription validate(Connection conn, SnapshotDescription snapshot, + Configuration conf) throws IllegalArgumentException, IOException { if (!snapshot.hasTable()) { throw new IllegalArgumentException( "Descriptor doesn't apply to a table, so we can't build it."); @@ -350,8 +380,8 @@ public static SnapshotDescription validate(SnapshotDescription snapshot, Configu snapshot = builder.build(); // set the acl to snapshot if security feature is enabled. - if (isSecurityAvailable(conf)) { - snapshot = writeAclToSnapshotDescription(snapshot, conf); + if (isSecurityAvailable(conn)) { + snapshot = writeAclToSnapshotDescription(conn, snapshot, conf); } return snapshot; } @@ -475,20 +505,32 @@ public static boolean isSnapshotOwner(org.apache.hadoop.hbase.client.SnapshotDes } public static boolean isSecurityAvailable(Configuration conf) throws IOException { - try (Connection conn = ConnectionFactory.createConnection(conf); - Admin admin = conn.getAdmin()) { + try (Connection conn = ConnectionFactory.createConnection(conf)) { + return isSecurityAvailable(conn); + } + } + + /** + * Prefer this overload over {@link #isSecurityAvailable(Configuration)} when the caller already + * holds a {@link Connection}, to avoid the cost of creating a fresh connection (which involves a + * ZK session and, in Kerberos environments, a TGS request to the KDC) for each call. + */ + public static boolean isSecurityAvailable(Connection conn) throws IOException { + try (Admin admin = conn.getAdmin()) { return admin.tableExists(PermissionStorage.ACL_TABLE_NAME); } } - private static SnapshotDescription writeAclToSnapshotDescription(SnapshotDescription snapshot, - Configuration conf) throws IOException { + private static SnapshotDescription writeAclToSnapshotDescription(Connection conn, + SnapshotDescription snapshot, Configuration conf) throws IOException { ListMultimap perms = User.runAsLoginUser(new PrivilegedExceptionAction>() { @Override public ListMultimap run() throws Exception { - return PermissionStorage.getTablePermissions(conf, - TableName.valueOf(snapshot.getTable())); + try (Table aclTable = conn.getTable(PermissionStorage.ACL_TABLE_NAME)) { + return PermissionStorage.getTablePermissions(conf, + TableName.valueOf(snapshot.getTable()), aclTable); + } } }); return snapshot.toBuilder() From cc167d929532a4b2511f0a0e5db4e378e2c3b0ad Mon Sep 17 00:00:00 2001 From: juke-mini666 Date: Sun, 10 May 2026 09:30:00 +0900 Subject: [PATCH 2/2] Address review feedback - Extract the hasTable() check into a private helper to avoid duplication across the 2-arg and 3-arg validate() overloads - Drop the over-explaining comment in the 2-arg wrapper --- .../snapshot/SnapshotDescriptionUtils.java | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java index 64249363337a..7269ba65b4d6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java @@ -303,17 +303,17 @@ private static Path getDefaultWorkingSnapshotDir(final Path rootDir) { */ public static SnapshotDescription validate(SnapshotDescription snapshot, Configuration conf) throws IllegalArgumentException, IOException { - // Fail fast on the basic invariant before paying the cost of creating a Connection (which - // involves a ZK session and, in Kerberos environments, a TGS request to the KDC). This also - // preserves the prior behavior of this overload, which callers (including unit tests with - // stub Configurations) rely on to throw without performing any I/O. + requireHasTable(snapshot); + try (Connection conn = ConnectionFactory.createConnection(conf)) { + return validate(conn, snapshot, conf); + } + } + + private static void requireHasTable(SnapshotDescription snapshot) { if (!snapshot.hasTable()) { throw new IllegalArgumentException( "Descriptor doesn't apply to a table, so we can't build it."); } - try (Connection conn = ConnectionFactory.createConnection(conf)) { - return validate(conn, snapshot, conf); - } } /** @@ -332,10 +332,7 @@ public static SnapshotDescription validate(SnapshotDescription snapshot, Configu */ public static SnapshotDescription validate(Connection conn, SnapshotDescription snapshot, Configuration conf) throws IllegalArgumentException, IOException { - if (!snapshot.hasTable()) { - throw new IllegalArgumentException( - "Descriptor doesn't apply to a table, so we can't build it."); - } + requireHasTable(snapshot); SnapshotDescription.Builder builder = snapshot.toBuilder();