commit 3fe9836f75bf9aa421a7a5f2a62b6d5a83e024bd Author: Vihang Karajgaonkar Date: Thu Mar 30 16:33:09 2017 -0700 HIVE-16347 : HiveMetastoreChecker should skip listing partitions which are not valid when hive.msck.path.validation is set to skip or ignore diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java index da24c7041fae72768794f3ba47b2d416d8cf8083..4add83600cadb39b38a76ec5450c3ef09e30a180 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java @@ -498,9 +498,10 @@ private Path processPathDepthInfo(final PathDepthInfo pd) } else if (!parts[0].equalsIgnoreCase(partColNames.get(currentDepth))) { logOrThrowExceptionWithMsg( "Unexpected partition key " + parts[0] + " found at " + nextPath); + } else { + // add sub-directory to the work queue if maxDepth is not yet reached + pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1)); } - // add sub-directory to the work queue if maxDepth is not yet reached - pendingPaths.add(new PathDepthInfo(nextPath, currentDepth + 1)); } } if (currentDepth == partColNames.size()) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java index d7fbbce2d8dbbd28b91978ba79639a2616b687bd..90e6781fa095bc4329140e895da81a765918233d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java @@ -235,6 +235,34 @@ public void testInvalidPartitionKeyName() throws HiveException, AlreadyExistsExc checker.checkMetastore(dbName, tableName, null, new CheckResult()); } + /* + * skip mode should not throw exception when a invalid partition directory + * is found. It should just ignore it + */ + @Test + public void testSkipInvalidPartitionKeyName() + throws HiveException, AlreadyExistsException, IOException { + hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); + checker = new HiveMetaStoreChecker(hive); + Table table = createTestTable(); + List partitions = hive.getPartitions(table); + assertEquals(2, partitions.size()); + // add a fake partition dir on fs + fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); + Path fakePart = + new Path(table.getDataLocation().toString(), "fakedate=2009-01-01/fakecity=sanjose"); + fs.mkdirs(fakePart); + fs.deleteOnExit(fakePart); + createPartitionsDirectoriesOnFS(table, 2); + CheckResult result = new CheckResult(); + checker.checkMetastore(dbName, tableName, null, result); + assertEquals(Collections. emptySet(), result.getTablesNotInMs()); + assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + // only 2 valid partitions should be added + assertEquals(2, result.getPartitionsNotInMs().size()); + } + private Table createTestTable() throws AlreadyExistsException, HiveException { Database db = new Database(); db.setName(dbName); @@ -487,6 +515,30 @@ public void testInvalidOrderForPartitionKeysOnFS() CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); } + + /* + * In skip mode msck should ignore invalid partitions instead of + * throwing exception + */ + @Test + public void testSkipInvalidOrderForPartitionKeysOnFS() + throws AlreadyExistsException, HiveException, IOException { + hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); + checker = new HiveMetaStoreChecker(hive); + Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); + // add 10 partitions on the filesystem + createInvalidPartitionDirsOnFS(testTable, 2); + // add 10 partitions on the filesystem + createPartitionsDirectoriesOnFS(testTable, 2); + CheckResult result = new CheckResult(); + checker.checkMetastore(dbName, tableName, null, result); + assertEquals(Collections. emptySet(), result.getTablesNotInMs()); + assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + // only 2 valid partitions should be added + assertEquals(2, result.getPartitionsNotInMs().size()); + } + /* * Test if single-threaded implementation checker throws HiveException when the there is a dummy * directory present in the nested level