diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1b186f7..426dba4 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2849,8 +2849,14 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal TEZ_DYNAMIC_SEMIJOIN_REDUCTION("hive.tez.dynamic.semijoin.reduction", true, "When dynamic semijoin is enabled, shuffle joins will perform a leaky semijoin before shuffle. This " + "requires hive.tez.dynamic.partition.pruning to be enabled."), + TEZ_MIN_BLOOM_FILTER_ENTRIES("hive.tez.min.bloom.filter.entries", 1000000L, + "Bloom filter should be of at min certain size to be effective"), TEZ_MAX_BLOOM_FILTER_ENTRIES("hive.tez.max.bloom.filter.entries", 100000000L, "Bloom filter should be of at max certain size to be effective"), + TEZ_BLOOM_FILTER_FACTOR("hive.tez.bloom.filter.factor", (float) 2.0, + "Bloom filter should be a multiple of this factor with nDV"), + TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION("hive.tez.bigtable.minsize.semijoin.reduction", 1000000L, + "Big table for runtime filteting should be of atleast this size"), TEZ_SMB_NUMBER_WAVES( "hive.tez.smb.number.waves", (float) 0.5, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index c513ee5..727f7bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -507,6 +507,8 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator(); bloomFilterEval.setSourceOperator(selectOp); bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR)); bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval); aggs.add(min); aggs.add(max); @@ -603,6 +605,8 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator(); bloomFilterEval.setSourceOperator(selectOp); bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR)); bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval); aggsFinal.add(min); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 47b229f..468e18e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -102,10 +102,9 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // after the stats phase we might have some cyclic dependencies that we need - // to take care of. - runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + // Remove any parallel edge between semijoin and mapjoin. + removeSemijoinsParallelToMapJoin(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // Remove semijoin optimization if it creates a cycle with mapside joins @@ -122,6 +121,12 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, removeSemiJoinIfNoStats(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + // after the stats phase we might have some cyclic dependencies that we need + // to take care of. + runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + // need a new run of the constant folding because we might have created lots // of "and true and true" conditions. // Rather than run the full constant folding just need to shortcut AND/OR expressions @@ -781,6 +786,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, break; } } + + // Check if big table is big enough that runtime filtering is + // worth it. + if (ts.getStatistics() != null) { + long numRows = ts.getStatistics().getNumRows(); + if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { + removeSemiJoin = true; + } + } + if (removeSemiJoin) { // The stats are not annotated, remove the semijoin operator GenTezUtils.removeBranch(rs); @@ -810,4 +825,111 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) GraphWalker ogw = new PreOrderOnceWalker(disp); ogw.startWalking(topNodes, null); } + + private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperator bigTableTS, + ParseContext parseContext, + Map semijoins) { + + boolean parallelEdges = false; + for (Operator op : mapjoin.getParentOperators()) { + if (!(op instanceof ReduceSinkOperator)) { + continue; + } + + op = op.getParentOperators().get(0); + + // Follow the Reducesink operator upstream which is on small table side. + while (!(op instanceof ReduceSinkOperator) && + !(op instanceof TableScanOperator) && + !(op.getChildren() != null && op.getChildren().size() > 1)) { + op = op.getParentOperators().get(0); + } + + // Bail out if RS or TS is encountered. + if (op instanceof ReduceSinkOperator || op instanceof TableScanOperator) { + continue; + } + + // A branch is hit. + for (Node nd : op.getChildren()) { + if (nd instanceof SelectOperator) { + Operator child = (Operator) nd; + + while (child.getChildOperators().size() > 0) { + child = child.getChildOperators().get(0); + } + + // If not ReduceSink Op, skip + if (!(child instanceof ReduceSinkOperator)) { + continue; + } + + ReduceSinkOperator rs = (ReduceSinkOperator) child; + TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs); + if (ts == null || ts != bigTableTS) { + // skip, no semijoin or not the one we are looking for. + continue; + } + + // Add the semijoin branch to the map + semijoins.put(rs, ts); + parallelEdges = true; + } + } + } + return parallelEdges; + } + + /* + * The algorithm looks at all the mapjoins in the operator pipeline until + * it hits RS Op and for each mapjoin examines if it has paralllel semijoin + * edge. + */ + private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) + throws SemanticException { + if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || + !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN)) { + // Not needed without semi-join reduction + return; + } + + // Get all the TS ops. + List> topOps = new ArrayList<>(); + topOps.addAll(procCtx.parseContext.getTopOps().values()); + + Map semijoins = new HashMap<>(); + for (Operator parent : topOps) { + // A TS can have multiple branches due to DPP Or Semijoin Opt. + // USe DFS to traverse all the branches until RS is hit. + Deque> deque = new LinkedList<>(); + deque.add(parent); + while (!deque.isEmpty()) { + Operator op = deque.poll(); + if (op instanceof ReduceSinkOperator) { + // Done with this branch + continue; + } + + if (op instanceof MapJoinOperator) { + // A candidate. + if (!findParallelSemiJoinBranch(op, (TableScanOperator) parent, + procCtx.parseContext, semijoins)) { + // No parallel edge was found for the given mapjoin op, + // no need to go down further, skip this TS operator pipeline. + break; + } + } + deque.addAll(op.getChildOperators()); + } + } + + if (semijoins.size() > 0) { + for (ReduceSinkOperator rs : semijoins.keySet()) { + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, + semijoins.get(rs)); + } + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java index 788aace..2b84beb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java @@ -73,6 +73,8 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticE // Source operator to get the number of entries private SelectOperator sourceOperator; private long maxEntries = 0; + private long minEntries = 0; + private float factor = 1; // ObjectInspector for input data. private PrimitiveObjectInspector inputOI; @@ -278,6 +280,9 @@ public long getExpectedEntries() { } } + // Update expectedEntries based on factor and minEntries configurations + expectedEntries = (long) (expectedEntries * factor); + expectedEntries = expectedEntries > minEntries ? expectedEntries : minEntries; return expectedEntries; } @@ -293,6 +298,21 @@ public void setMaxEntries(long maxEntries) { this.maxEntries = maxEntries; } + public void setMinEntries(long minEntries) { + this.minEntries = minEntries; + } + + public long getMinEntries() { + return minEntries; + } + + public void setFactor(float factor) { + this.factor = factor; + } + + public float getFactor() { + return factor; + } @Override public String getExprString() { return "expectedEntries=" + getExpectedEntries(); diff --git a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q index d28da6e..9145a36 100644 --- a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q @@ -6,7 +6,8 @@ set hive.ppd.remove.duplicatefilters=true; set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; - +set hive.tez.min.bloom.filter.entries=1; +set hive.tez.bigtable.minsize.semijoin.reduction=1; select distinct ds from srcpart; select distinct hr from srcpart; diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index 5482cdb..f04a923 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -8,6 +8,8 @@ set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.stats.autogather=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; -- Create Tables create table alltypesorc_int ( cint int, cstring string ) stored as ORC; @@ -67,7 +69,7 @@ select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcp set hive.tez.dynamic.semijoin.reduction=true; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); ---set hive.tez.dynamic.semijoin.reduction=false; +set hive.tez.dynamic.semijoin.reduction=false; -- With Mapjoins. set hive.auto.convert.join=true; @@ -79,6 +81,15 @@ select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcp set hive.tez.dynamic.semijoin.reduction=true; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +set hive.tez.dynamic.semijoin.reduction=false; + +-- multiple sources, different keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +--set hive.tez.dynamic.semijoin.reduction=false; -- With unions explain select * from alltypesorc_int join diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q index 2306395..88386a6 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q @@ -7,6 +7,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; CREATE TABLE `table_1`( `bigint_col_7` bigint, diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q index 01650f8..d5fe136 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q @@ -12,6 +12,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; -- Try with merge statements create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); diff --git a/ql/src/test/queries/clientpositive/mergejoin.q b/ql/src/test/queries/clientpositive/mergejoin.q index eecd105..381f253 100644 --- a/ql/src/test/queries/clientpositive/mergejoin.q +++ b/ql/src/test/queries/clientpositive/mergejoin.q @@ -9,6 +9,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; +set hive.tez.min.bloom.filter.entries=1; +set hive.tez.bigtable.minsize.semijoin.reduction=1; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q index d2ded71..cb0e279 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q @@ -8,7 +8,8 @@ set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; - +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; select distinct ds from srcpart; select distinct hr from srcpart; diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q index 68b57ea..f7c1f3b 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q @@ -7,6 +7,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; set hive.vectorized.adaptor.usage.mode=none; set hive.vectorized.execution.enabled=true; diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q index be8e4af..4bdff42 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q @@ -7,6 +7,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; set hive.vectorized.adaptor.usage.mode=none; set hive.vectorized.execution.enabled=true; diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 96d998f..e514e2e 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -3224,7 +3224,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -3270,7 +3270,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index d32cb5c..235fed0 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -327,7 +327,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -375,7 +375,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -744,7 +744,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -757,7 +757,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -790,7 +790,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -840,7 +840,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -852,7 +852,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -864,7 +864,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1080,7 +1080,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1093,7 +1093,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=214) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1141,7 +1141,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1153,7 +1153,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=214) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1410,7 +1410,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=214) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1443,7 +1443,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1507,7 +1507,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=214) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1519,7 +1519,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1568,19 +1568,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -1624,19 +1623,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1654,18 +1640,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1705,19 +1679,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -1761,12 +1734,300 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +8224 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +0 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Map 3 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1791,11 +2052,11 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1810,8 +2071,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_date@ds=2008-04-08 PREHOOK: Input: default@srcpart_date@ds=2008-04-09 @@ -1819,8 +2081,9 @@ PREHOOK: Input: default@srcpart_small PREHOOK: Input: default@srcpart_small@ds=2008-04-08 PREHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 @@ -1828,7 +2091,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +0 PREHOOK: query: explain select * from alltypesorc_int join (select srcpart_date.key as key from srcpart_date union all @@ -1848,20 +2111,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE), Union 3 (BROADCAST_EDGE) + Map 1 <- Union 3 (BROADCAST_EDGE) Map 2 <- Union 3 (CONTAINS) - Map 5 <- Union 3 (CONTAINS) - Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Map 4 <- Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + filterExpr: cstring is not null (type: boolean) Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + predicate: cstring is not null (type: boolean) Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring (type: string) @@ -1904,22 +2166,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: srcpart_small @@ -1937,33 +2186,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out index 5f75977..d291e7d 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out @@ -184,7 +184,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -230,7 +230,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index c272fc1..8950b70 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -82,7 +82,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -235,7 +235,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -353,7 +353,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -406,7 +406,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -501,7 +501,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=4) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -551,7 +551,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=4) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -704,7 +704,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=4) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index 186600b..c1fde0b 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -63,7 +63,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -95,7 +95,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=14) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -313,7 +313,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -381,7 +381,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1426,7 +1426,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1493,7 +1493,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1586,7 +1586,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=500) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1634,7 +1634,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=500) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1823,7 +1823,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1876,7 +1876,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1940,7 +1940,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -1952,7 +1952,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=14) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -2026,7 +2026,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -2094,7 +2094,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -2216,7 +2216,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2267,7 +2267,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2313,7 +2313,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1016) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2372,7 +2372,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -2440,7 +2440,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -2516,7 +2516,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2569,7 +2569,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=28) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -2633,7 +2633,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2645,7 +2645,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=14) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=28) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -2769,7 +2769,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2820,7 +2820,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2866,7 +2866,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1016) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -2973,7 +2973,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE @@ -3019,7 +3019,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index c534cb5..6a9a76c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -3597,7 +3597,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -3657,7 +3657,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index e62d913..10fc0f3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -132,7 +132,7 @@ STAGE PLANS: projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator @@ -225,7 +225,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -369,7 +369,7 @@ STAGE PLANS: projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary className: VectorGroupByOperator @@ -462,7 +462,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -606,7 +606,7 @@ STAGE PLANS: projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary className: VectorGroupByOperator @@ -699,7 +699,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -844,7 +844,7 @@ STAGE PLANS: projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator @@ -914,7 +914,7 @@ STAGE PLANS: projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator @@ -1009,7 +1009,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -1038,7 +1038,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -1183,7 +1183,7 @@ STAGE PLANS: projectedOutputColumns: [0] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary className: VectorGroupByOperator @@ -1210,7 +1210,7 @@ STAGE PLANS: projectedOutputColumns: [1] Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator @@ -1303,7 +1303,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -1332,7 +1332,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -1476,7 +1476,7 @@ STAGE PLANS: projectedOutputColumns: [1] Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=29) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=58) Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator @@ -1569,7 +1569,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=29) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=58) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator @@ -1685,7 +1685,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=47) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=94) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE @@ -1733,7 +1733,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=47) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=94) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index d9fd706..a03466f 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -152,7 +152,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=18) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=36) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -200,7 +200,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=18) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=36) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -288,7 +288,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=16) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE @@ -336,7 +336,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=16) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE @@ -424,7 +424,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=60) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -472,7 +472,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=60) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -560,7 +560,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE @@ -608,7 +608,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE @@ -696,7 +696,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE @@ -744,7 +744,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE @@ -832,7 +832,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -880,7 +880,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -968,7 +968,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE @@ -1016,7 +1016,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE @@ -1104,7 +1104,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE @@ -1152,7 +1152,7 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE