diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 33fe3b6..087207b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -155,6 +155,7 @@ private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; + private static final int DEFAULT_ETL_FILE_THRESHOLD = 100; /** * When picking the hosts for a split that crosses block boundaries, @@ -510,7 +511,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, private final int splitStrategyBatchMs; private final long maxSize; private final long minSize; - private final int minSplits; + private final int etlFileThreshold; private final boolean footerInSplits; private final boolean cacheStripeDetails; private final boolean forceThreadpool; @@ -555,7 +556,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, cacheStripeDetails = (cacheStripeDetailsSize > 0); - this.minSplits = Math.min(cacheStripeDetailsSize, minSplits); + this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits; synchronized (Context.class) { if (threadPool == null) { @@ -1938,7 +1939,7 @@ private static boolean isStripeSatisfyPredicate( deltas, covered, isOriginal, ugi, allowSyntheticFileIds); default: // HYBRID strategy - if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) { + if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold) { return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseOrOriginalFiles, deltas, covered, isOriginal, ugi, allowSyntheticFileIds); } else { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 4eb0249..3d8e51a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -524,6 +524,27 @@ public void testSplitStrategySelection() throws Exception { } } } + + k = 0; + conf.set("hive.orc.cache.stripe.details.size", "-1"); + for (int c : counts) { + for (int s : sizes) { + final FileSystem fs = generateMockFiles(c, s); + for (int n : numSplits) { + final OrcInputFormat.Context context = new OrcInputFormat.Context( + conf, n); + OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( + context, fs, new MockPath(fs, "mock:/a/b"), false, null); + final SplitStrategy splitStrategy = createSplitStrategy(context, gen); + assertTrue( + String.format( + "Split strategy for %d files x %d size for %d splits", c, s, + n), + splitStrategy.getClass().getSimpleName() + .equals(strategyResults[k++])); + } + } + } } @Test