diff --git a/ql/src/test/queries/clientpositive/msck_repair_5.q b/ql/src/test/queries/clientpositive/msck_repair_5.q new file mode 100644 index 0000000000..4b510bd29b --- /dev/null +++ b/ql/src/test/queries/clientpositive/msck_repair_5.q @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS repairtable_n5; + +CREATE TABLE repairtable_n5(key int) partitioned by (Year int, Month int, Value string); + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n5/Year=2020/Month=03/Value=Val1; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n5/Year=2020/Month=03/Value=Val1/datafile; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n5/Year=2020/Month=03/Value=Val2; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n5/Year=2020/Month=03/Value=Val2/datafile; + +MSCK REPAIR TABLE default.repairtable_n5; +SHOW PARTITIONS default.repairtable_n5; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n5/Year=2020/Month=03/Value=val3; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n5/Year=2020/Month=03/Value=val3/datafile; + +MSCK REPAIR TABLE default.repairtable_n5; +SHOW PARTITIONS default.repairtable_n5; + +DROP TABLE default.repairtable_n5; diff --git a/ql/src/test/results/clientpositive/msck_repair_5.q.out b/ql/src/test/results/clientpositive/msck_repair_5.q.out new file mode 100644 index 0000000000..25056782c9 --- /dev/null +++ b/ql/src/test/results/clientpositive/msck_repair_5.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE repairtable_n5(key int) partitioned by (Year int, Month int, Value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: CREATE TABLE repairtable_n5(key int) partitioned by (Year int, Month int, Value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n5 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n5 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n5 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n5 +Partitions not in metastore: repairtable_n5:year=2020/month=03/value=Val1 repairtable_n5:year=2020/month=03/value=Val2 +#### A masked pattern was here #### +PREHOOK: query: SHOW PARTITIONS default.repairtable_n5 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n5 +POSTHOOK: query: SHOW PARTITIONS default.repairtable_n5 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n5 +year=2020/month=03/value=Val1 +year=2020/month=03/value=Val2 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n5 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n5 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n5 +Partitions not in metastore: repairtable_n5:year=2020/month=03/value=val3 +#### A masked pattern was here #### +PREHOOK: query: SHOW PARTITIONS default.repairtable_n5 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n5 +POSTHOOK: query: SHOW PARTITIONS default.repairtable_n5 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n5 +year=2020/month=03/value=Val1 +year=2020/month=03/value=Val2 +year=2020/month=03/value=val3 +PREHOOK: query: DROP TABLE default.repairtable_n5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n5 +PREHOOK: Output: default@repairtable_n5 +POSTHOOK: query: DROP TABLE default.repairtable_n5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n5 +POSTHOOK: Output: default@repairtable_n5 diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java index 6f4400a8ef..9b6854988a 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java @@ -533,6 +533,42 @@ private void logOrThrowExceptionWithMsg(String msg) throws MetastoreException { } } + /** + * Get the normalized partition path. By converting partition column + * names in the partition path to lower case. + * + * @param tablePath + * Path of the table. + * @param partitionPath + * Path of the partition. + * @return normalized partition path, for example tablePath/PartitionDate=2008-01-01 + * gets converted to tablePath/partitiondate=2008-01-01 + */ + private Path getNormalizedPartitionPath(Path tablePath, Path partPath) { + String result = null; + Path currPath = partPath; + LOG.debug("tablePath:" + tablePath); + + while (currPath != null && !tablePath.equals(currPath)) { + String[] parts = currPath.getName().split("="); + if (parts.length > 0) { + if (parts.length != 2) { + LOG.warn(currPath.getName() + " is not a valid partition name"); + return partPath; + } + + if (result == null) { + result = parts[0].toLowerCase() + "=" + parts[1]; + } else { + result = parts[0].toLowerCase() + "=" + parts[1] + Path.SEPARATOR + result; + } + } + currPath = currPath.getParent(); + LOG.debug("currPath=" + currPath); + } + return new Path(tablePath + Path.SEPARATOR + result); + } + @VisibleForTesting void checkPartitionDirs(final ExecutorService executor, final Path basePath, final Set result, @@ -558,7 +594,7 @@ void checkPartitionDirs(final ExecutorService executor, while(!futures.isEmpty()) { Path p = futures.poll().get(); if (p != null) { - result.add(p); + result.add(getNormalizedPartitionPath(basePath, p)); } } //update the nextlevel with newly discovered sub-directories from the above