diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java index 7821f40a82..d592dbfb07 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java @@ -137,6 +137,7 @@ private void cleanUpTableQuietly(String catName, String dbName, String tableName for (int i = 0; i < numOfParts; i++) { PartitionResult result = new PartitionResult(); result.setPartitionName("city=dummyCity_" + String.valueOf(i)); + result.setTableName("dummyTable"); partsNotInMs.add(result); } return partsNotInMs; diff --git ql/src/test/queries/clientnegative/msck_repair_5.q ql/src/test/queries/clientnegative/msck_repair_5.q new file mode 100644 index 0000000000..fe1eab75e6 --- /dev/null +++ ql/src/test/queries/clientnegative/msck_repair_5.q @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS repairtable; + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING); + +MSCK REPAIR TABLE default.repairtable; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=A; + +MSCK REPAIR TABLE default.repairtable; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=a; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/P1=a; + +MSCK REPAIR TABLE default.repairtable; + +DROP TABLE default.repairtable; diff --git ql/src/test/queries/clientnegative/msck_repair_6.q ql/src/test/queries/clientnegative/msck_repair_6.q new file mode 100644 index 0000000000..76e85571aa --- /dev/null +++ ql/src/test/queries/clientnegative/msck_repair_6.q @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS repairtable; + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING); + +MSCK REPAIR TABLE default.repairtable; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=a; + +MSCK REPAIR TABLE default.repairtable; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/P1=a; + +MSCK REPAIR TABLE default.repairtable; + +DROP TABLE default.repairtable; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/msck_repair_4.q ql/src/test/queries/clientpositive/msck_repair_4.q new file mode 100644 index 0000000000..aaf10a454d --- /dev/null +++ ql/src/test/queries/clientpositive/msck_repair_4.q @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS repairtable_n4; + +CREATE EXTERNAL TABLE repairtable_n4(key INT, value STRING) PARTITIONED BY (Year INT, Month INT, Day INT) stored as ORC; + +MSCK TABLE repairtable_n4; +show partitions repairtable_n4; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n4/Year=2020/Month=3/Day=1; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n4/Year=2020/Month=3/Day=2; + +MSCK TABLE repairtable_n4; +show partitions repairtable_n4; + +DROP TABLE default.repairtable_n4; diff --git ql/src/test/queries/clientpositive/msck_repair_5.q ql/src/test/queries/clientpositive/msck_repair_5.q new file mode 100644 index 0000000000..280fe10b00 --- /dev/null +++ ql/src/test/queries/clientpositive/msck_repair_5.q @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS repairtable_n5; + +CREATE EXTERNAL TABLE repairtable_n5(key INT, value STRING) PARTITIONED BY (Country String) stored as ORC; + +MSCK TABLE repairtable_n5; +show partitions repairtable_n5; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n5/Country=US; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n5/Country=us; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n5/Country=India; + +MSCK TABLE repairtable_n5; +show partitions repairtable_n5; + +DROP TABLE default.repairtable_n5; diff --git ql/src/test/queries/clientpositive/msck_repair_6.q ql/src/test/queries/clientpositive/msck_repair_6.q new file mode 100644 index 0000000000..03fef3d6ff --- /dev/null +++ ql/src/test/queries/clientpositive/msck_repair_6.q @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS repairtable_n6; + +CREATE EXTERNAL TABLE repairtable_n6(key INT, value STRING) PARTITIONED BY (Year INT, Month INT, Day INT) stored as ORC location '${system:test.tmp.dir}/apps/hive/warehouse/test.db/Repairtable_n6/'; + +MSCK TABLE repairtable_n6; +show partitions repairtable_n6; + +dfs ${system:test.dfs.mkdir} -p ${system:test.tmp.dir}/apps/hive/warehouse/test.db/Repairtable_n6/Year=2020/Month=4/Day=1; + +MSCK TABLE repairtable_n6; +show partitions repairtable_n6; + +DROP TABLE default.repairtable_n6; diff --git ql/src/test/results/clientnegative/msck_repair_5.q.out ql/src/test/results/clientnegative/msck_repair_5.q.out new file mode 100644 index 0000000000..b06dde7410 --- /dev/null +++ ql/src/test/results/clientnegative/msck_repair_5.q.out @@ -0,0 +1,30 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable +POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=A +#### A masked pattern was here #### +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask diff --git ql/src/test/results/clientnegative/msck_repair_6.q.out ql/src/test/results/clientnegative/msck_repair_6.q.out new file mode 100644 index 0000000000..680b009220 --- /dev/null +++ ql/src/test/results/clientnegative/msck_repair_6.q.out @@ -0,0 +1,30 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable +POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=a +#### A masked pattern was here #### +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.ddl.DDLTask diff --git ql/src/test/results/clientpositive/msck_repair_4.q.out ql/src/test/results/clientpositive/msck_repair_4.q.out new file mode 100644 index 0000000000..2bbf38679d --- /dev/null +++ ql/src/test/results/clientpositive/msck_repair_4.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE repairtable_n4(key INT, value STRING) PARTITIONED BY (Year INT, Month INT, Day INT) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n4 +POSTHOOK: query: CREATE EXTERNAL TABLE repairtable_n4(key INT, value STRING) PARTITIONED BY (Year INT, Month INT, Day INT) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n4 +PREHOOK: query: MSCK TABLE repairtable_n4 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n4 +POSTHOOK: query: MSCK TABLE repairtable_n4 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n4 +PREHOOK: query: show partitions repairtable_n4 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n4 +POSTHOOK: query: show partitions repairtable_n4 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n4 +PREHOOK: query: MSCK TABLE repairtable_n4 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n4 +POSTHOOK: query: MSCK TABLE repairtable_n4 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n4 +Partitions not in metastore: repairtable_n4:year=2020/month=3/day=1 repairtable_n4:year=2020/month=3/day=2 +PREHOOK: query: show partitions repairtable_n4 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n4 +POSTHOOK: query: show partitions repairtable_n4 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n4 +PREHOOK: query: DROP TABLE default.repairtable_n4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n4 +PREHOOK: Output: default@repairtable_n4 +POSTHOOK: query: DROP TABLE default.repairtable_n4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n4 +POSTHOOK: Output: default@repairtable_n4 diff --git ql/src/test/results/clientpositive/msck_repair_5.q.out ql/src/test/results/clientpositive/msck_repair_5.q.out new file mode 100644 index 0000000000..f0ff8158fe --- /dev/null +++ ql/src/test/results/clientpositive/msck_repair_5.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE repairtable_n5(key INT, value STRING) PARTITIONED BY (Country String) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: CREATE EXTERNAL TABLE repairtable_n5(key INT, value STRING) PARTITIONED BY (Country String) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n5 +PREHOOK: query: MSCK TABLE repairtable_n5 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: MSCK TABLE repairtable_n5 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n5 +PREHOOK: query: show partitions repairtable_n5 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n5 +POSTHOOK: query: show partitions repairtable_n5 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n5 +PREHOOK: query: MSCK TABLE repairtable_n5 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: MSCK TABLE repairtable_n5 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n5 +Partitions not in metastore: repairtable_n5:country=India repairtable_n5:country=US repairtable_n5:country=us +PREHOOK: query: show partitions repairtable_n5 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n5 +POSTHOOK: query: show partitions repairtable_n5 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n5 +PREHOOK: query: DROP TABLE default.repairtable_n5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n5 +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: DROP TABLE default.repairtable_n5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n5 +POSTHOOK: Output: default@repairtable_n5 diff --git ql/src/test/results/clientpositive/msck_repair_6.q.out ql/src/test/results/clientpositive/msck_repair_6.q.out new file mode 100644 index 0000000000..30a38ad961 --- /dev/null +++ ql/src/test/results/clientpositive/msck_repair_6.q.out @@ -0,0 +1,47 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n6 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n6 +POSTHOOK: type: DROPTABLE +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n6 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n6 +PREHOOK: query: MSCK TABLE repairtable_n6 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: MSCK TABLE repairtable_n6 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +PREHOOK: query: show partitions repairtable_n6 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n6 +POSTHOOK: query: show partitions repairtable_n6 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n6 +PREHOOK: query: MSCK TABLE repairtable_n6 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: MSCK TABLE repairtable_n6 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +Partitions not in metastore: repairtable_n6:year=2020/month=4/day=1 +PREHOOK: query: show partitions repairtable_n6 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n6 +POSTHOOK: query: show partitions repairtable_n6 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n6 +PREHOOK: query: DROP TABLE default.repairtable_n6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n6 +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: DROP TABLE default.repairtable_n6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n6 +POSTHOOK: Output: default@repairtable_n6 diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/CheckResult.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/CheckResult.java index 5287f47e21..4feeb6416b 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/CheckResult.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/CheckResult.java @@ -17,6 +17,10 @@ */ package org.apache.hadoop.hive.metastore; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.MetaException; + +import java.util.Map; import java.util.Set; import java.util.TreeSet; @@ -31,6 +35,7 @@ private Set partitionsNotOnFs = new TreeSet(); private Set partitionsNotInMs = new TreeSet(); private Set expiredPartitions = new TreeSet<>(); + private Set correctPartitions = new TreeSet<>(); /** * @return a list of tables not found on the filesystem. @@ -101,6 +106,14 @@ public void setExpiredPartitions( this.expiredPartitions = expiredPartitions; } + public Set getCorrectPartitions() { + return this.correctPartitions; + } + + public void setCorrectPartitions(final Set correctPartitions) { + this.correctPartitions = correctPartitions; + } + /** * A basic description of a partition that is missing from either the fs or * the ms. @@ -108,6 +121,7 @@ public void setExpiredPartitions( public static class PartitionResult implements Comparable { private String partitionName; private String tableName; + private Path path; /** * @return name of partition @@ -139,11 +153,33 @@ public void setTableName(String tableName) { this.tableName = tableName; } + public void setPath(Path path) { + this.path = path; + } + + public Path getLocation(Path tablePath, Map partSpec) throws MetaException { + if (this.path == null) { + return new Path(tablePath, Warehouse.makePartPath(partSpec)); + } + + return this.path; + } + @Override public String toString() { return tableName + ":" + partitionName; } + @Override + public boolean equals(Object other) { + if (other instanceof PartitionResult) { + if (0 == compareTo((PartitionResult)other)) { + return true; + } + } + return false; + } + public int compareTo(PartitionResult o) { int ret = tableName.compareTo(o.tableName); return ret != 0 ? ret : partitionName.compareTo(o.partitionName); diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java index 6f4400a8ef..bac0e97d5a 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java @@ -318,11 +318,14 @@ void checkTable(Table table, PartitionIterable parts, continue; } fs = partPath.getFileSystem(conf); + + CheckResult.PartitionResult prFromMetastore = new CheckResult.PartitionResult(); + prFromMetastore.setPartitionName(getPartitionName(table, partition)); + prFromMetastore.setTableName(partition.getTableName()); if (!fs.exists(partPath)) { - CheckResult.PartitionResult pr = new CheckResult.PartitionResult(); - pr.setPartitionName(getPartitionName(table, partition)); - pr.setTableName(partition.getTableName()); - result.getPartitionsNotOnFs().add(pr); + result.getPartitionsNotOnFs().add(prFromMetastore); + } else { + result.getCorrectPartitions().add(prFromMetastore); } if (partitionExpirySeconds > 0) { @@ -401,7 +404,18 @@ void findUnknownPartitions(Table table, Set partPaths, CheckResult.PartitionResult pr = new CheckResult.PartitionResult(); pr.setPartitionName(partitionName); pr.setTableName(table.getTableName()); - + // Also set the correct partition path here as creating path from Warehouse.makePartPath will always return + // lowercase keys/path. Even if we add the new partition with lowerkeys, get queries on such partition + // will not return any results. + pr.setPath(partPath); + + // Check if partition already exists. No need to check for those partition which are present in db + // but no in fs as msck will override the partition location in db + if (result.getCorrectPartitions().contains(pr)) { + throw new MetastoreException("The partition '" + pr.toString() + "' already exists in db"); + } else if (result.getPartitionsNotInMs().contains(pr)) { + throw new MetastoreException("Found two paths for same partition '" + pr.toString() + "'"); + } result.getPartitionsNotInMs().add(pr); } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java index f4e109d1b0..ab0c649f90 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/Msck.java @@ -373,7 +373,7 @@ public Void execute(int size) throws MetastoreException { continue; } Map partSpec = Warehouse.makeSpecFromName(part.getPartitionName()); - Path location = new Path(tablePath, Warehouse.makePartPath(partSpec)); + Path location = part.getLocation(tablePath, partSpec); Partition partition = MetaStoreServerUtils.createMetaPartitionObject(table, partSpec, location); partition.setWriteId(table.getWriteId()); partsToAdd.add(partition); diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java index 92d10cd0e1..f9d7c13e1b 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java @@ -1449,12 +1449,16 @@ public static String getPartitionName(Path tablePath, Path partitionPath, Set