diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java index dfc778a..ecd4ddc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ColumnarSplitSizeEstimator.java @@ -42,7 +42,6 @@ public long getEstimatedSize(InputSplit inputSplit) throws IOException { if (isDebugEnabled) { LOG.debug("Estimated column projection size: " + colProjSize); } - return colProjSize; } else if (inputSplit instanceof HiveInputFormat.HiveInputSplit) { InputSplit innerSplit = ((HiveInputFormat.HiveInputSplit) inputSplit).getInputSplit(); @@ -51,9 +50,12 @@ public long getEstimatedSize(InputSplit inputSplit) throws IOException { if (isDebugEnabled) { LOG.debug("Estimated column projection size: " + colProjSize); } - return colProjSize; } } + if (colProjSize <= 0) { + /* columnar splits of unknown size - estimate worst-case */ + return Integer.MAX_VALUE; + } return colProjSize; } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 4eb0249..c1ef0e7 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper; +import org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -556,6 +557,28 @@ public void testFileGenerator() throws Exception { } @Test + public void testACIDSplitStrategy() throws Exception { + conf.set("bucket_count", "2"); + OrcInputFormat.Context context = new OrcInputFormat.Context(conf); + MockFileSystem fs = new MockFileSystem(conf, + new MockFile("mock:/a/delta_000_001/part-00", 1000, new byte[1], new MockBlock("host1")), + new MockFile("mock:/a/delta_000_001/part-01", 1000, new byte[1], new MockBlock("host1")), + new MockFile("mock:/a/delta_001_002/part-02", 1000, new byte[1], new MockBlock("host1")), + new MockFile("mock:/a/delta_001_002/part-03", 1000, new byte[1], new MockBlock("host1"))); + OrcInputFormat.FileGenerator gen = + new OrcInputFormat.FileGenerator(context, fs, + new MockPath(fs, "mock:/a"), false, null); + OrcInputFormat.SplitStrategy splitStrategy = createSplitStrategy(context, gen); + assertEquals(true, splitStrategy instanceof OrcInputFormat.ACIDSplitStrategy); + List splits = splitStrategy.getSplits(); + ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator(); + for (OrcSplit split: splits) { + assertEquals(Integer.MAX_VALUE, splitSizeEstimator.getEstimatedSize(split)); + } + assertEquals(2, splits.size()); + } + + @Test public void testBIStrategySplitBlockBoundary() throws Exception { conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI"); OrcInputFormat.Context context = new OrcInputFormat.Context(conf);