diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index c64bc8c72c..f3f205decc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -505,7 +505,7 @@ public String toString() { /** * Get the base directory. - * @return the base directory to read + * @return the base directory to read or {@cod null} if there is none (for given snapshot) */ Path getBaseDirectory(); diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 04ef7fc86a..fa5dbacd67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -188,7 +188,6 @@ private void overrideMRProps(JobConf job, Map properties) { } } } - /** * Run Compaction which may consist of several jobs on the cluster. * @param conf Hive configuration file @@ -214,6 +213,11 @@ void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd, AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, false, true); List parsedDeltas = dir.getCurrentDirectories(); + if(parsedDeltas.size() == 1 && ci.type == CompactionType.MINOR) { + /*this means we delta_0000018_0000018_0000/ (ignore base for Minor)*/ + assert dir.getOriginalFiles().size() == 0 : "These should've been obsolete"; + } + int maxDeltastoHandle = conf.getIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA); if(parsedDeltas.size() > maxDeltastoHandle) { /** diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java index af4a1da4dc..d651d17c29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java @@ -256,6 +256,10 @@ private CompactionType determineCompactionType(CompactionInfo ci, ValidTxnList t FileSystem fs = location.getFileSystem(conf); AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns, false, false); Path base = dir.getBaseDirectory(); + if(base == null && dir.getOriginalFiles().size() > 0) { + //if there are original files but no base we always want to do major to add ROW__IDs to the data + return CompactionType.MAJOR; + } long baseSize = 0; FileStatus stat = null; if (base != null) { @@ -267,11 +271,6 @@ private CompactionType determineCompactionType(CompactionInfo ci, ValidTxnList t baseSize = sumDirSize(fs, base); } - List originals = dir.getOriginalFiles(); - for (HdfsFileStatusWithId origStat : originals) { - baseSize += origStat.getFileStatus().getLen(); - } - long deltaSize = 0; List deltas = dir.getCurrentDirectories(); for (AcidUtils.ParsedDelta delta : deltas) {