commit 8cf62974e8ed5cb82d3a54a7bfc3d68ef2458a14 Author: Vihang Karajgaonkar Date: Thu Feb 23 10:26:38 2017 -0800 HIVE-16014 : HiveMetastoreChecker should use hive.metastore.fshandler.threads instead of hive.mv.files.thread for pool size diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java index 7c94c95f00492467ba27dedc9ce513e13c85ea61..0f6d056e66dd9ca3d3d8ac41e07456168353add7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java @@ -407,8 +407,11 @@ private void checkPartitionDirs(Path basePath, Set allDirs, int maxDepth) basePaths.add(basePath); Set dirSet = Collections.newSetFromMap(new ConcurrentHashMap()); // Here we just reuse the THREAD_COUNT configuration for - // HIVE_MOVE_FILES_THREAD_COUNT - int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 15); + // METASTORE_FS_HANDLER_THREADS_COUNT since this results in better performance + // The number of missing partitions discovered are later added by metastore using a + // threadpool of size METASTORE_FS_HANDLER_THREADS_COUNT. If we have different sized + // pool here the smaller sized pool of the two becomes a bottleneck + int poolSize = conf.getInt(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT.varname, 15); // Check if too low config is provided for move files. 2x CPU is reasonable max count. poolSize = poolSize == 0 ? poolSize : Math.max(poolSize,