diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 47e8b34..2d6ef9a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -608,14 +608,12 @@ public String toString() { boolean isOriginal; List deltas; FileSystem fs; - Context context; Path dir; public BISplitStrategy(Context context, FileSystem fs, Path dir, List fileStatuses, boolean isOriginal, List deltas, boolean[] covered) { super(dir, context.numBuckets, deltas, covered); - this.context = context; this.fileStatuses = fileStatuses; this.isOriginal = isOriginal; this.deltas = deltas; @@ -627,11 +625,13 @@ public BISplitStrategy(Context context, FileSystem fs, public List getSplits() throws IOException { List splits = Lists.newArrayList(); for (FileStatus fileStatus : fileStatuses) { - String[] hosts = SHIMS.getLocationsWithOffset(fs, fileStatus).firstEntry().getValue() - .getHosts(); - OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts, - null, isOriginal, true, deltas, -1); - splits.add(orcSplit); + TreeMap blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus); + for (Map.Entry entry : blockOffsets.entrySet()) { + OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), entry.getKey(), + entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true, + deltas, -1); + splits.add(orcSplit); + } } // add uncovered ACID delta splits diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index c0fcedc..a345884 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -540,6 +540,96 @@ public void testFileGenerator() throws Exception { } + @Test + public void testBIStrategySplitBlockBoundary() throws Exception { + conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI"); + OrcInputFormat.Context context = new OrcInputFormat.Context(conf); + MockFileSystem fs = new MockFileSystem(conf, + new MockFile("mock:/a/b/part-00", 1000, new byte[1], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-01", 1000, new byte[1], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-02", 1000, new byte[1], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-03", 1000, new byte[1], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-04", 1000, new byte[1], new MockBlock("host1", "host2"))); + OrcInputFormat.FileGenerator gen = + new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b")); + OrcInputFormat.SplitStrategy splitStrategy = gen.call(); + assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy); + List splits = splitStrategy.getSplits(); + int numSplits = splits.size(); + assertEquals(5, numSplits); + + context = new OrcInputFormat.Context(conf); + fs = new MockFileSystem(conf, + new MockFile("mock:/a/b/part-00", 1000, new byte[1000], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-01", 1000, new byte[1000], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-02", 1000, new byte[1000], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-03", 1000, new byte[1000], new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-04", 1000, new byte[1000], new MockBlock("host1", "host2"))); + gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b")); + splitStrategy = gen.call(); + assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy); + splits = splitStrategy.getSplits(); + numSplits = splits.size(); + assertEquals(5, numSplits); + + context = new OrcInputFormat.Context(conf); + fs = new MockFileSystem(conf, + new MockFile("mock:/a/b/part-00", 1000, new byte[1100], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-01", 1000, new byte[1100], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-02", 1000, new byte[1100], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-03", 1000, new byte[1100], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-04", 1000, new byte[1100], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2"))); + gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b")); + splitStrategy = gen.call(); + assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy); + splits = splitStrategy.getSplits(); + numSplits = splits.size(); + assertEquals(10, numSplits); + + context = new OrcInputFormat.Context(conf); + fs = new MockFileSystem(conf, + new MockFile("mock:/a/b/part-00", 1000, new byte[2000], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-01", 1000, new byte[2000], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-02", 1000, new byte[2000], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-03", 1000, new byte[2000], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-04", 1000, new byte[2000], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2"))); + gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b")); + splitStrategy = gen.call(); + assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy); + splits = splitStrategy.getSplits(); + numSplits = splits.size(); + assertEquals(10, numSplits); + + context = new OrcInputFormat.Context(conf); + fs = new MockFileSystem(conf, + new MockFile("mock:/a/b/part-00", 1000, new byte[2200], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2"), new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-01", 1000, new byte[2200], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2"), new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-02", 1000, new byte[2200], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2"), new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-03", 1000, new byte[2200], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2"), new MockBlock("host1", "host2")), + new MockFile("mock:/a/b/part-04", 1000, new byte[2200], new MockBlock("host1", "host2"), + new MockBlock("host1", "host2"), new MockBlock("host1", "host2"))); + gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b")); + splitStrategy = gen.call(); + assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy); + splits = splitStrategy.getSplits(); + numSplits = splits.size(); + assertEquals(15, numSplits); + } + public static class MockBlock { int offset; int length;