diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java index a164b12..92c12ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java @@ -310,7 +310,7 @@ void findUnknownPartitions(Table table, Set partPaths, // now check the table folder and see if we find anything // that isn't in the metastore Set allPartDirs = new HashSet(); - getAllLeafDirs(tablePath, allPartDirs); + getLastPartitionDirs(tablePath, allPartDirs, table.getPartCols().size()); // don't want the table dir allPartDirs.remove(tablePath); @@ -367,12 +367,14 @@ private String getPartitionName(Path tablePath, Path partitionPath) { * Start directory * @param allDirs * This set will contain the leaf paths at the end. + * @param maxDepth + * Specify how deep the search goes. * @throws IOException * Thrown if we can't get lists from the fs. * @throws HiveException */ - private void getAllLeafDirs(Path basePath, Set allDirs) throws IOException, HiveException { + private void getLastPartitionDirs(Path basePath, Set allDirs, int maxDepth) throws IOException, HiveException { ConcurrentLinkedQueue basePaths = new ConcurrentLinkedQueue<>(); basePaths.add(basePath); // we only use the keySet of ConcurrentHashMap @@ -390,14 +392,14 @@ private void getAllLeafDirs(Path basePath, Set allDirs) throws IOException LOG.debug("Using threaded version of MSCK-GetPaths with number of threads " + ((ThreadPoolExecutor) pool).getPoolSize()); } - getAllLeafDirs(pool, basePaths, dirSet, basePath.getFileSystem(conf)); + getLastPartitionDirs(pool, basePaths, dirSet, basePath.getFileSystem(conf), maxDepth); pool.shutdown(); allDirs.addAll(dirSet.keySet()); } // process the basePaths in parallel and then the next level of basePaths - private void getAllLeafDirs(final ExecutorService pool, final ConcurrentLinkedQueue basePaths, - final Map allDirs, final FileSystem fs) throws IOException, HiveException { + private void getLastPartitionDirs(final ExecutorService pool, final ConcurrentLinkedQueue basePaths, + final Map allDirs, final FileSystem fs, final int maxDepth) throws IOException, HiveException { final ConcurrentLinkedQueue nextLevel = new ConcurrentLinkedQueue<>(); if (null == pool) { for (final Path path : basePaths) { @@ -410,12 +412,12 @@ private void getAllLeafDirs(final ExecutorService pool, final ConcurrentLinkedQu } } - if (!directoryFound) { + if (!directoryFound && maxDepth == 0) { // true is just a boolean object place holder because neither the key nor the value can be null. allDirs.put(path, true); } - if (!nextLevel.isEmpty()) { - getAllLeafDirs(pool, nextLevel, allDirs, fs); + if (!nextLevel.isEmpty() && maxDepth != 0) { + getLastPartitionDirs(pool, nextLevel, allDirs, fs, maxDepth - 1); } } } else { @@ -434,7 +436,7 @@ public Void call() throws Exception { } } - if (!directoryFound) { + if (!directoryFound && maxDepth == 0) { allDirs.put(path, true); } return null; @@ -450,10 +452,9 @@ public Void call() throws Exception { throw new HiveException(e.getCause()); } } - if (!nextLevel.isEmpty()) { - getAllLeafDirs(pool, nextLevel, allDirs, fs); + if (!nextLevel.isEmpty() && maxDepth != 0) { + getLastPartitionDirs(pool, nextLevel, allDirs, fs, maxDepth - 1); } } } - } diff --git a/ql/src/test/queries/clientpositive/msck_repair_wrong_columns.q b/ql/src/test/queries/clientpositive/msck_repair_wrong_columns.q new file mode 100644 index 0000000..ea596cb --- /dev/null +++ b/ql/src/test/queries/clientpositive/msck_repair_wrong_columns.q @@ -0,0 +1,18 @@ +set hive.msck.repair.batch.size=1; + +DROP TABLE IF EXISTS repairtable; + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING); + +MSCK TABLE repairtable; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=c/p2=a/p3=b; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=c/p2=a/p3=b/datafile; + +MSCK TABLE default.repairtable; + +MSCK REPAIR TABLE default.repairtable; + +MSCK TABLE repairtable; + +DROP TABLE default.repairtable; diff --git a/ql/src/test/results/clientpositive/msck_repair_wrong_columns.q.out b/ql/src/test/results/clientpositive/msck_repair_wrong_columns.q.out new file mode 100644 index 0000000..8ef23fb --- /dev/null +++ b/ql/src/test/results/clientpositive/msck_repair_wrong_columns.q.out @@ -0,0 +1,36 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable +POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE repairtable +POSTHOOK: type: MSCK +PREHOOK: query: MSCK TABLE default.repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE default.repairtable +POSTHOOK: type: MSCK +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +PREHOOK: query: MSCK TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE repairtable +POSTHOOK: type: MSCK +PREHOOK: query: DROP TABLE default.repairtable +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable +PREHOOK: Output: default@repairtable +POSTHOOK: query: DROP TABLE default.repairtable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable +POSTHOOK: Output: default@repairtable