From a591b6cde06c9abc9f9832b84b0839d81483b4be Mon Sep 17 00:00:00 2001 From: VicoWu <583424568@qq.com> Date: Sat, 23 May 2020 10:28:39 +0800 Subject: [PATCH 1/4] Avoid meaningless retry attempts in unrecoverable failure --- .../hadoop/hbase/tool/BulkLoadHFilesTool.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java index e8b701b7ae3..ff111a86654 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java @@ -868,6 +868,8 @@ public void bulkHFile(ColumnFamilyDescriptorBuilder builder, FileStatus hfileSta Pair, Set> pair = null; Map item2RegionMap = new HashMap<>(); + int previousRegionNum = 0; + // Assumes that region splits can happen while this occurs. while (!queue.isEmpty()) { // need to reload split keys each iteration. @@ -879,13 +881,21 @@ public void bulkHFile(ColumnFamilyDescriptorBuilder builder, FileStatus hfileSta } int maxRetries = getConf().getInt(HConstants.BULKLOAD_MAX_RETRIES_NUMBER, 10); - maxRetries = Math.max(maxRetries, startEndKeys.size() + 1); + + /** + * For the first attempt, we make maxRetries with the configured maximum retry number + * As long as we find that region number changed, we setup maxRetries to region number + * But if we find that the region is not changed, then the maxRetries should be still + * be configured BULKLOAD_MAX_RETRIES_NUMBER to avoid meaningless retry attempts + */ + if(count != 0 && previousRegionNum != startEndKeys.size() ) + maxRetries = Math.max(maxRetries, startEndKeys.size() + 1); if (maxRetries != 0 && count >= maxRetries) { throw new IOException( "Retry attempted " + count + " times without completing, bailing out"); } count++; - + previousRegionNum = startEndKeys.size(); // Using ByteBuffer for byte[] equality semantics pair = groupOrSplitPhase(conn, tableName, pool, queue, startEndKeys); Multimap regionGroups = pair.getFirst(); From 1f2d700be683c7c801eaa901669f89af89576f63 Mon Sep 17 00:00:00 2001 From: VicoWu <583424568@qq.com> Date: Wed, 27 May 2020 10:03:08 +0800 Subject: [PATCH 2/4] revert previous solution and change the solution to server side --- .../regionserver/SecureBulkLoadManager.java | 31 +++++++++++-------- .../hadoop/hbase/tool/BulkLoadHFilesTool.java | 12 +------ 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java index 15d87101a8f..bedb18f348e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java @@ -382,22 +382,27 @@ public String prepareBulkLoad(final byte[] family, final String srcPath, boolean throw new IOException("Path does not reference a file: " + p); } - // Check to see if the source and target filesystems are the same - if (!FSUtils.isSameHdfs(conf, srcFs, fs)) { - LOG.debug("Bulk-load file " + srcPath + " is on different filesystem than " + + try { + // Check to see if the source and target filesystems are the same + if (!FSUtils.isSameHdfs(conf, srcFs, fs)) { + LOG.debug("Bulk-load file " + srcPath + " is on different filesystem than " + "the destination filesystem. Copying file over to destination staging dir."); - FileUtil.copy(srcFs, p, fs, stageP, false, conf); - } else if (copyFile) { - LOG.debug("Bulk-load file " + srcPath + " is copied to destination staging dir."); - FileUtil.copy(srcFs, p, fs, stageP, false, conf); - } else { - LOG.debug("Moving " + p + " to " + stageP); - FileStatus origFileStatus = fs.getFileStatus(p); - origPermissions.put(srcPath, origFileStatus.getPermission()); - if(!fs.rename(p, stageP)) { - throw new IOException("Failed to move HFile: " + p + " to " + stageP); + FileUtil.copy(srcFs, p, fs, stageP, false, conf); + } else if (copyFile) { + LOG.debug("Bulk-load file " + srcPath + " is copied to destination staging dir."); + FileUtil.copy(srcFs, p, fs, stageP, false, conf); + } else { + LOG.debug("Moving " + p + " to " + stageP); + FileStatus origFileStatus = fs.getFileStatus(p); + origPermissions.put(srcPath, origFileStatus.getPermission()); + if(!fs.rename(p, stageP)) { + throw new IOException("Failed to move HFile: " + p + " to " + stageP); + } } + }catch (Throwable t){ + throw new IOException("Failed to prepare bulk load for path " + srcPath, t); } + fs.setPermission(stageP, PERM_ALL_ACCESS); return stageP.toString(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java index ff111a86654..e4677030404 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java @@ -868,8 +868,6 @@ public void bulkHFile(ColumnFamilyDescriptorBuilder builder, FileStatus hfileSta Pair, Set> pair = null; Map item2RegionMap = new HashMap<>(); - int previousRegionNum = 0; - // Assumes that region splits can happen while this occurs. while (!queue.isEmpty()) { // need to reload split keys each iteration. @@ -882,20 +880,12 @@ public void bulkHFile(ColumnFamilyDescriptorBuilder builder, FileStatus hfileSta int maxRetries = getConf().getInt(HConstants.BULKLOAD_MAX_RETRIES_NUMBER, 10); - /** - * For the first attempt, we make maxRetries with the configured maximum retry number - * As long as we find that region number changed, we setup maxRetries to region number - * But if we find that the region is not changed, then the maxRetries should be still - * be configured BULKLOAD_MAX_RETRIES_NUMBER to avoid meaningless retry attempts - */ - if(count != 0 && previousRegionNum != startEndKeys.size() ) - maxRetries = Math.max(maxRetries, startEndKeys.size() + 1); + maxRetries = Math.max(maxRetries, startEndKeys.size() + 1); if (maxRetries != 0 && count >= maxRetries) { throw new IOException( "Retry attempted " + count + " times without completing, bailing out"); } count++; - previousRegionNum = startEndKeys.size(); // Using ByteBuffer for byte[] equality semantics pair = groupOrSplitPhase(conn, tableName, pool, queue, startEndKeys); Multimap regionGroups = pair.getFirst(); From 97f825c5c74eb5723d7daa7e27b774ed438b618d Mon Sep 17 00:00:00 2001 From: VicoWu <583424568@qq.com> Date: Wed, 27 May 2020 10:47:35 +0800 Subject: [PATCH 3/4] throw io exception instead of swallow it --- .../hbase/regionserver/SecureBulkLoadManager.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java index bedb18f348e..6074a513267 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.math.BigInteger; import java.security.PrivilegedAction; +import java.security.PrivilegedExceptionAction; import java.security.SecureRandom; import java.util.ArrayList; import java.util.HashMap; @@ -264,10 +265,9 @@ private boolean isUserReferenced(UserGroupInformation ugi) { ugi.addToken(targetFsToken); } } - - map = ugi.doAs(new PrivilegedAction>>() { + map = ugi.doAs(new PrivilegedExceptionAction>>() { @Override - public Map> run() { + public Map> run() throws IOException { FileSystem fs = null; try { /* @@ -291,10 +291,10 @@ private boolean isUserReferenced(UserGroupInformation ugi) { //We call bulkLoadHFiles as requesting user //To enable access prior to staging return region.bulkLoadHFiles(familyPaths, true, - new SecureBulkLoadListener(fs, bulkToken, conf), request.getCopyFile(), + new SecureBulkLoadListener(fs, bulkToken, conf), request.getCopyFile(), clusterIds, request.getReplicate()); } catch (Exception e) { - LOG.error("Failed to complete bulk load", e); + throw new IOException(e); } return null; } From bcb9620190b40d3099ce980c4e4b1ba2ce5d1e3b Mon Sep 17 00:00:00 2001 From: VicoWu <583424568@qq.com> Date: Wed, 27 May 2020 10:51:38 +0800 Subject: [PATCH 4/4] clean up unused code --- .../java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java index e4677030404..e8b701b7ae3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java @@ -879,13 +879,13 @@ public void bulkHFile(ColumnFamilyDescriptorBuilder builder, FileStatus hfileSta } int maxRetries = getConf().getInt(HConstants.BULKLOAD_MAX_RETRIES_NUMBER, 10); - maxRetries = Math.max(maxRetries, startEndKeys.size() + 1); if (maxRetries != 0 && count >= maxRetries) { throw new IOException( "Retry attempted " + count + " times without completing, bailing out"); } count++; + // Using ByteBuffer for byte[] equality semantics pair = groupOrSplitPhase(conn, tableName, pool, queue, startEndKeys); Multimap regionGroups = pair.getFirst();