diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java index 6805c17a116f5ef0febd36c59d454fa631ae0024..84c090239df39d7ea987d561bf4ab1e852f75624 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; @@ -35,7 +36,10 @@ import java.util.concurrent.ThreadPoolExecutor; import com.google.common.collect.Sets; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.session.SessionState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -49,8 +53,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.thrift.TException; import com.google.common.util.concurrent.MoreExecutors; @@ -64,6 +66,7 @@ public class HiveMetaStoreChecker { public static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreChecker.class); + public static final String CLASS_NAME = HiveMetaStoreChecker.class.getName(); private final Hive hive; private final HiveConf conf; @@ -208,19 +211,28 @@ void checkTable(String dbName, String tableName, return; } - List parts = new ArrayList(); + PartitionIterable parts; boolean findUnknownPartitions = true; if (table.isPartitioned()) { if (partitions == null || partitions.isEmpty()) { - PrunedPartitionList prunedPartList = - PartitionPruner.prune(table, null, conf, toString(), null); - // no partitions specified, let's get all - parts.addAll(prunedPartList.getPartitions()); + String mode = HiveConf.getVar(conf, ConfVars.HIVEMAPREDMODE, (String) null); + if ("strict".equalsIgnoreCase(mode)) { + parts = new PartitionIterable(hive, table, null, conf.getIntVar( + HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); + } else { + List loadedPartitions = new ArrayList<>(); + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); + loadedPartitions.addAll(hive.getAllPartitionsOf(table)); + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); + parts = new PartitionIterable(loadedPartitions); + } } else { // we're interested in specific partitions, // don't check for any others findUnknownPartitions = false; + List loadedPartitions = new ArrayList<>(); for (Map map : partitions) { Partition part = hive.getPartition(table, map, false); if (part == null) { @@ -229,10 +241,13 @@ void checkTable(String dbName, String tableName, pr.setPartitionName(Warehouse.makePartPath(map)); result.getPartitionsNotInMs().add(pr); } else { - parts.add(part); + loadedPartitions.add(part); } } + parts = new PartitionIterable(loadedPartitions); } + } else { + parts = new PartitionIterable(Collections.emptyList()); } checkTable(table, parts, findUnknownPartitions, result); @@ -255,7 +270,7 @@ void checkTable(String dbName, String tableName, * @throws HiveException * Could not create Partition object */ - void checkTable(Table table, List parts, + void checkTable(Table table, PartitionIterable parts, boolean findUnknownPartitions, CheckResult result) throws IOException, HiveException { @@ -284,7 +299,9 @@ void checkTable(Table table, List parts, } for (int i = 0; i < partition.getSpec().size(); i++) { - partPaths.add(partPath.makeQualified(fs)); + Path qualifiedPath = partPath.makeQualified(fs); + StringInternUtils.internUriStringsInPath(qualifiedPath); + partPaths.add(qualifiedPath); partPath = partPath.getParent(); } } diff --git ql/src/test/queries/clientpositive/msck_repair_0.q ql/src/test/queries/clientpositive/msck_repair_0.q index ce8ef426a2a58845afc8333259d66725db416584..22542331621ca4ce5277c2f46a4264b7540a4d1e 100644 --- ql/src/test/queries/clientpositive/msck_repair_0.q +++ ql/src/test/queries/clientpositive/msck_repair_0.q @@ -16,4 +16,11 @@ MSCK REPAIR TABLE default.repairtable; MSCK TABLE repairtable; +set hive.mapred.mode=strict; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable/p1=e/p2=f/p3=g; +dfs -touchz ${system:test.warehouse.dir}/repairtable/p1=e/p2=f/p3=g/datafile; + +MSCK REPAIR TABLE default.repairtable; + DROP TABLE default.repairtable; diff --git ql/src/test/results/clientpositive/msck_repair_0.q.out ql/src/test/results/clientpositive/msck_repair_0.q.out index 3f2fe75b194f1248bd5c073dd7db6b71b2ffc2ba..2e0d9dc423071ebbd9a55606f196cf7752e27b1a 100644 --- ql/src/test/results/clientpositive/msck_repair_0.q.out +++ ql/src/test/results/clientpositive/msck_repair_0.q.out @@ -37,6 +37,14 @@ PREHOOK: Output: default@repairtable POSTHOOK: query: MSCK TABLE repairtable POSTHOOK: type: MSCK POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK REPAIR TABLE default.repairtable +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable +Partitions not in metastore: repairtable:p1=e/p2=f +Repair: Added partition to metastore default.repairtable:p1=e/p2=f PREHOOK: query: DROP TABLE default.repairtable PREHOOK: type: DROPTABLE PREHOOK: Input: default@repairtable