diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java index 25177ef..db8d0e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java @@ -155,6 +155,11 @@ public void readFields(DataInput in) throws IOException { stmtIds.add(in.readInt()); } } + + @Override + public String toString() { + return "{ minTxnId: " + minTxnId + " maxTxnId: " + maxTxnId + " stmtIds: " + stmtIds + " }"; + } } /** * Options for controlling the record readers. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 0813033..bfd1aec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1227,7 +1227,6 @@ private AcidDirInfo callInternal() throws IOException { private OrcFile.WriterVersion writerVersion; private long projColsUncompressedSize; private List deltaSplits; - private final SplitInfo splitInfo; private final ByteBuffer ppdResult; private final UserGroupInformation ugi; private final boolean allowSyntheticFileIds; @@ -1250,7 +1249,6 @@ public SplitGenerator(SplitInfo splitInfo, UserGroupInformation ugi, this.hasBase = splitInfo.hasBase; this.projColsUncompressedSize = -1; this.deltaSplits = splitInfo.getSplits(); - this.splitInfo = splitInfo; this.allowSyntheticFileIds = allowSyntheticFileIds; this.ppdResult = splitInfo.ppdResult; } @@ -1464,12 +1462,9 @@ public String toString() { // 2) delete all rows // 3) major compaction // 4) insert some rows - // In such cases, consider base files without any stripes as uncovered delta + // In such cases, consider entire base delta file as an orc split (similar to what BI strategy does) if (stripes == null || stripes.isEmpty()) { - AcidOutputFormat.Options options = AcidUtils.parseBaseOrDeltaBucketFilename(file.getPath(), context.conf); - int bucket = options.getBucket(); - splitInfo.covered[bucket] = false; - deltaSplits = splitInfo.getSplits(); + splits.add(createSplit(0, file.getLen(), orcTail)); } else { // if we didn't have predicate pushdown, read everything if (includeStripe == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index d61b24b..998cbc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -225,6 +225,6 @@ public boolean canUseLlapIo() { public String toString() { return "OrcSplit [" + getPath() + ", start=" + getStart() + ", length=" + getLength() + ", isOriginal=" + isOriginal + ", fileLength=" + fileLen + ", hasFooter=" + hasFooter + - ", hasBase=" + hasBase + ", deltas=" + (deltas == null ? 0 : deltas.size()) + "]"; + ", hasBase=" + hasBase + ", deltas=" + deltas + "]"; } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index a14ff5d..d5d0d27 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -3087,7 +3087,7 @@ public void testNonVectorReaderNoFooterSerialize() throws Exception { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3159,7 +3159,7 @@ public void testNonVectorReaderFooterSerialize() throws Exception { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for non-vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3232,7 +3232,7 @@ public void testVectorReaderNoFooterSerialize() throws Exception { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3307,7 +3307,7 @@ public void testVectorReaderFooterSerialize() throws Exception { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for vector reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3380,7 +3380,7 @@ public void testACIDReaderNoFooterSerialize() throws Exception { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=false")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for non-vector reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3457,7 +3457,7 @@ public void testACIDReaderFooterSerialize() throws Exception { assertTrue(split.toString().contains("start=3")); assertTrue(split.toString().contains("hasFooter=true")); assertTrue(split.toString().contains("hasBase=true")); - assertTrue(split.toString().contains("deltas=0")); + assertTrue(split.toString().contains("deltas=[]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3535,7 +3535,7 @@ public void testACIDReaderNoFooterSerializeWithDeltas() throws Exception { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertFalse("No footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter()); @@ -3614,7 +3614,7 @@ public void testACIDReaderFooterSerializeWithDeltas() throws Exception { // NOTE: don't be surprised if deltas value is different // in older release deltas=2 as min and max transaction are added separately to delta list. // in newer release since both of them are put together deltas=1 - assertTrue(split.toString().contains("deltas=1")); + assertTrue(split.toString().contains("deltas=[{ minTxnId: 1 maxTxnId: 2 stmtIds: [] }]]")); if (split instanceof OrcSplit) { assertTrue("Footer serialize test for ACID reader, hasFooter is not expected in" + " orc splits.", ((OrcSplit) split).hasFooter());