diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 2d6ef9a..9ac34b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -126,6 +126,7 @@ private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; + private static final int DEFAULT_ETL_FILE_THRESHOLD = 100; private static final PerfLogger perfLogger = PerfLogger.getPerfLogger(); private static final String CLASS_NAME = ReaderImpl.class.getName(); @@ -434,7 +435,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, private final int numBuckets; private final long maxSize; private final long minSize; - private final int minSplits; + private final int etlFileThreshold; private final boolean footerInSplits; private final boolean cacheStripeDetails; private final AtomicInteger cacheHitCounter = new AtomicInteger(0); @@ -469,7 +470,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, cacheStripeDetails = (cacheStripeDetailsSize > 0); - this.minSplits = Math.min(cacheStripeDetailsSize, minSplits); + this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits; synchronized (Context.class) { if (threadPool == null) { @@ -748,7 +749,7 @@ public SplitStrategy call() throws IOException { break; default: // HYBRID strategy - if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) { + if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold) { splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas, covered); } else { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index c0d912d..fa32bf6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -509,6 +509,27 @@ public void testSplitStrategySelection() throws Exception { } } } + + k = 0; + conf.set("hive.orc.cache.stripe.details.size", "-1"); + for (int c : counts) { + for (int s : sizes) { + final FileSystem fs = generateMockFiles(c, s); + for (int n : numSplits) { + final OrcInputFormat.Context context = new OrcInputFormat.Context( + conf, n); + OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( + context, fs, new MockPath(fs, "mock:/a/b")); + final SplitStrategy splitStrategy = gen.call(); + assertTrue( + String.format( + "Split strategy for %d files x %d size for %d splits", c, s, + n), + splitStrategy.getClass().getSimpleName() + .equals(strategyResults[k++])); + } + } + } } @Test