diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 48e8491..d69cb09 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2724,6 +2724,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "directories that are partition-like but contain unsupported characters. 'throw' (an " + "exception) is the default; 'skip' will skip the invalid directories and still repair the" + " others; 'ignore' will skip the validation (legacy behavior, causes bugs in many cases)"), + HIVE_MSCK_REPAIR_BATCH_SIZE( + "hive.msck.repair.batch.size", 10000, "Batch size for the msck repair"), HIVE_SERVER2_LLAP_CONCURRENT_QUERIES("hive.server2.llap.concurrent.queries", -1, "The number of queries allowed in parallel via llap. Negative number implies 'infinite'."), HIVE_TEZ_ENABLE_MEMORY_MANAGER("hive.tez.enable.memory.manager", true, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index a81eb18..318807a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -111,6 +111,7 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.metadata.CheckResult; +import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker; @@ -1823,12 +1824,30 @@ private int msck(Hive db, MsckDesc msckDesc) { AddPartitionDesc apd = new AddPartitionDesc( table.getDbName(), table.getTableName(), false); try { - for (CheckResult.PartitionResult part : partsNotInMs) { - apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null); - repairOutput.add("Repair: Added partition to metastore " - + msckDesc.getTableName() + ':' + part.getPartitionName()); + int batch_size = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE); + if (partsNotInMs.size() > batch_size) { + while (!partsNotInMs.isEmpty()) { + Iterator iterator = partsNotInMs.iterator(); + apd = new AddPartitionDesc(table.getDbName(), table.getTableName(), false); + ArrayList partList = new ArrayList(); + for (int i = 1; iterator.hasNext() && i <= batch_size; i++) { + PartitionResult part = iterator.next(); + apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null); + repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + + ':' + part.getPartitionName()); + partList.add(part); + } + db.createPartitions(apd); + partsNotInMs.removeAll(partList); + } + } else { + for (CheckResult.PartitionResult part : partsNotInMs) { + apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null); + repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + + ':' + part.getPartitionName()); + } + db.createPartitions(apd); } - db.createPartitions(apd); } catch (Exception e) { LOG.info("Could not bulk-add partitions to metastore; trying one by one", e); repairOutput.clear();