diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 62908f9..68ef745 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2849,8 +2849,16 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal TEZ_DYNAMIC_SEMIJOIN_REDUCTION("hive.tez.dynamic.semijoin.reduction", true, "When dynamic semijoin is enabled, shuffle joins will perform a leaky semijoin before shuffle. This " + "requires hive.tez.dynamic.partition.pruning to be enabled."), + TEZ_MIN_BLOOM_FILTER_ENTRIES("hive.tez.min.bloom.filter.entries", 1000000L, + "Bloom filter should be of at min certain size to be effective"), TEZ_MAX_BLOOM_FILTER_ENTRIES("hive.tez.max.bloom.filter.entries", 100000000L, "Bloom filter should be of at max certain size to be effective"), + TEZ_BLOOM_FILTER_FACTOR("hive.tez.bloom.filter.factor", (float) 2.0, + "Bloom filter should be a multiple of this factor with nDV"), + TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD("hive.tez.dynamic.semijoin.reduction.threshold", (float) 0.50, + "Only perform semijoin optimizations it will reduce the scan results by this minimum amount"), + TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION("hive.tez.bigtable.minsize.semijoin.reduction", 1000000L, + "Big table for runtime filteting should be of atleast this size"), TEZ_SMB_NUMBER_WAVES( "hive.tez.smb.number.waves", (float) 0.5, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java index 2139eae..5bc002d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java @@ -62,6 +62,8 @@ public VectorExpression inputExpression() { } private long expectedEntries = -1; + private long minEntries = -1; + private float factor = 1; private ValueProcessor valueProcessor; transient private int bitSetSize = -1; transient private BytesWritable bw = new BytesWritable(); @@ -75,8 +77,9 @@ public VectorExpression inputExpression() { BloomFilter bf; - public Aggregation(long expectedEntries) { - bf = new BloomFilter(expectedEntries); + public Aggregation(long expectedEntries, long minEntries, float factor) { + long entries = (long)(expectedEntries * factor); + bf = new BloomFilter(entries > minEntries ? entries : minEntries); } @Override @@ -132,7 +135,7 @@ public AggregationBuffer getNewAggregationBuffer() throws HiveException { if (expectedEntries < 0) { throw new IllegalStateException("expectedEntries not initialized"); } - return new Aggregation(expectedEntries); + return new Aggregation(expectedEntries, minEntries, factor); } @Override @@ -408,6 +411,8 @@ public void init(AggregationDesc desc) throws HiveException { GenericUDAFBloomFilterEvaluator udafBloomFilter = (GenericUDAFBloomFilterEvaluator) desc.getGenericUDAFEvaluator(); expectedEntries = udafBloomFilter.getExpectedEntries(); + minEntries = udafBloomFilter.getMinEntries(); + factor = udafBloomFilter.getFactor(); } public VectorExpression getInputExpression() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java index d2446d5..9e196b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java @@ -47,6 +47,8 @@ public VectorExpression inputExpression() { } private long expectedEntries = -1; + private long minEntries = -1; + private float factor = 1; transient private int aggBufferSize = -1; transient private BytesWritable bw = new BytesWritable(); @@ -58,9 +60,10 @@ public VectorExpression inputExpression() { byte[] bfBytes; - public Aggregation(long expectedEntries) { + public Aggregation(long expectedEntries, long minEntries, float factor) { try { - BloomFilter bf = new BloomFilter(expectedEntries); + long entries = (long) (expectedEntries * factor); + BloomFilter bf = new BloomFilter(entries > minEntries ? entries : minEntries); ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); BloomFilter.serialize(bytesOut, bf); bfBytes = bytesOut.toByteArray(); @@ -95,7 +98,7 @@ public AggregationBuffer getNewAggregationBuffer() throws HiveException { if (expectedEntries < 0) { throw new IllegalStateException("expectedEntries not initialized"); } - return new Aggregation(expectedEntries); + return new Aggregation(expectedEntries, minEntries, factor); } @Override @@ -358,6 +361,8 @@ public void init(AggregationDesc desc) throws HiveException { GenericUDAFBloomFilterEvaluator udafBloomFilter = (GenericUDAFBloomFilterEvaluator) desc.getGenericUDAFEvaluator(); expectedEntries = udafBloomFilter.getExpectedEntries(); + minEntries = udafBloomFilter.getMinEntries(); + factor = udafBloomFilter.getFactor(); } void processValue(Aggregation myagg, ColumnVector columnVector, int i) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index c513ee5..727f7bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -507,6 +507,8 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator(); bloomFilterEval.setSourceOperator(selectOp); bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR)); bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval); aggs.add(min); aggs.add(max); @@ -603,6 +605,8 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator(); bloomFilterEval.setSourceOperator(selectOp); bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES)); + bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR)); bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval); aggsFinal.add(min); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 47b229f..8107cea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -102,12 +102,6 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // after the stats phase we might have some cyclic dependencies that we need - // to take care of. - runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // Remove semijoin optimization if it creates a cycle with mapside joins removeSemiJoinCyclesDueToMapsideJoins(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join"); @@ -122,6 +116,12 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, removeSemiJoinIfNoStats(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + // after the stats phase we might have some cyclic dependencies that we need + // to take care of. + runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + // need a new run of the constant folding because we might have created lots // of "and true and true" conditions. // Rather than run the full constant folding just need to shortcut AND/OR expressions @@ -781,6 +781,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, break; } } + + // Check if big table is big enough that runtime filtering is + // worth it. + if (ts.getStatistics() != null) { + long numRows = ts.getStatistics().getNumRows(); + if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { + removeSemiJoin = true; + } + } + if (removeSemiJoin) { // The stats are not annotated, remove the semijoin operator GenTezUtils.removeBranch(rs); @@ -810,4 +820,111 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) GraphWalker ogw = new PreOrderOnceWalker(disp); ogw.startWalking(topNodes, null); } + + private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperator bigTableTS, + ParseContext parseContext, + Map semijoins) { + + boolean parallelEdges = false; + for (Operator op : mapjoin.getParentOperators()) { + if (!(op instanceof ReduceSinkOperator)) { + continue; + } + + op = op.getParentOperators().get(0); + + // Follow the Reducesink operator upstream which is on small table side. + while (!(op instanceof ReduceSinkOperator) && + !(op instanceof TableScanOperator) && + !(op.getChildren() != null && op.getChildren().size() > 1)) { + op = op.getParentOperators().get(0); + } + + // Bail out if RS or TS is encountered. + if (op instanceof ReduceSinkOperator || op instanceof TableScanOperator) { + continue; + } + + // A branch is hit. + for (Node nd : op.getChildren()) { + if (nd instanceof SelectOperator) { + Operator child = (Operator) nd; + + while (child.getChildOperators().size() > 0) { + child = child.getChildOperators().get(0); + } + + // If not ReduceSink Op, skip + if (!(child instanceof ReduceSinkOperator)) { + continue; + } + + ReduceSinkOperator rs = (ReduceSinkOperator) child; + TableScanOperator ts = parseContext.getRsOpToTsOpMap().get(rs); + if (ts == null || ts != bigTableTS) { + // skip, no semijoin or not the one we are looking for. + continue; + } + + // Add the semijoin branch to the map + semijoins.put(rs, ts); + parallelEdges = true; + } + } + } + return parallelEdges; + } + + /* + * The algorithm looks at all the mapjoins in the operator pipeline until + * it hits RS Op and for each mapjoin examines if it has paralllel semijoin + * edge. + */ + private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) + throws SemanticException { + if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || + !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN)) { + // Not needed without semi-join reduction + return; + } + + // Get all the TS ops. + List> topOps = new ArrayList<>(); + topOps.addAll(procCtx.parseContext.getTopOps().values()); + + Map semijoins = new HashMap<>(); + for (Operator parent : topOps) { + // A TS can have multiple branches due to DPP Or Semijoin Opt. + // USe DFS to traverse all the branches until RS is hit. + Deque> deque = new LinkedList<>(); + deque.add(parent); + while (!deque.isEmpty()) { + Operator op = deque.poll(); + if (op instanceof ReduceSinkOperator) { + // Done with this branch + continue; + } + + if (op instanceof MapJoinOperator) { + // A candidate. + if (!findParallelSemiJoinBranch(op, (TableScanOperator) parent, + procCtx.parseContext, semijoins)) { + // No parallel edge was found for the given mapjoin op, + // no need to go down further, skip this TS operator pipeline. + break; + } + } + deque.addAll(op.getChildOperators()); + } + } + + if (semijoins.size() > 0) { + for (ReduceSinkOperator rs : semijoins.keySet()) { + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, + semijoins.get(rs)); + } + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java index 788aace..25a7d29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java @@ -73,6 +73,8 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticE // Source operator to get the number of entries private SelectOperator sourceOperator; private long maxEntries = 0; + private long minEntries = 0; + private float factor = 1; // ObjectInspector for input data. private PrimitiveObjectInspector inputOI; @@ -105,11 +107,13 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc static class BloomFilterBuf extends AbstractAggregationBuffer { BloomFilter bloomFilter; - public BloomFilterBuf(long expectedEntries, long maxEntries) { + public BloomFilterBuf(long expectedEntries, long minEntries, + long maxEntries, float factor) { if (expectedEntries > maxEntries) { bloomFilter = new BloomFilter(1); } else { - bloomFilter = new BloomFilter(expectedEntries); + long entries = (long)(expectedEntries * factor); + bloomFilter = new BloomFilter(entries > minEntries ? entries : minEntries); } } @@ -131,7 +135,7 @@ public AggregationBuffer getNewAggregationBuffer() throws HiveException { throw new IllegalStateException("BloomFilter expectedEntries not initialized"); } - BloomFilterBuf buf = new BloomFilterBuf(expectedEntries, maxEntries); + BloomFilterBuf buf = new BloomFilterBuf(expectedEntries, minEntries, maxEntries, factor); reset(buf); return buf; } @@ -293,6 +297,21 @@ public void setMaxEntries(long maxEntries) { this.maxEntries = maxEntries; } + public void setMinEntries(long minEntries) { + this.minEntries = minEntries; + } + + public long getMinEntries() { + return minEntries; + } + + public void setFactor(float factor) { + this.factor = factor; + } + + public float getFactor() { + return factor; + } @Override public String getExprString() { return "expectedEntries=" + getExpectedEntries(); diff --git a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q index d28da6e..9145a36 100644 --- a/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/dynamic_partition_pruning.q @@ -6,7 +6,8 @@ set hive.ppd.remove.duplicatefilters=true; set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; - +set hive.tez.min.bloom.filter.entries=1; +set hive.tez.bigtable.minsize.semijoin.reduction=1; select distinct ds from srcpart; select distinct hr from srcpart; diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index 5482cdb..f04a923 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -8,6 +8,8 @@ set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.stats.autogather=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; -- Create Tables create table alltypesorc_int ( cint int, cstring string ) stored as ORC; @@ -67,7 +69,7 @@ select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcp set hive.tez.dynamic.semijoin.reduction=true; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); ---set hive.tez.dynamic.semijoin.reduction=false; +set hive.tez.dynamic.semijoin.reduction=false; -- With Mapjoins. set hive.auto.convert.join=true; @@ -79,6 +81,15 @@ select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcp set hive.tez.dynamic.semijoin.reduction=true; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +set hive.tez.dynamic.semijoin.reduction=false; + +-- multiple sources, different keys +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring); +--set hive.tez.dynamic.semijoin.reduction=false; -- With unions explain select * from alltypesorc_int join diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q index 2306395..88386a6 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q @@ -7,6 +7,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; CREATE TABLE `table_1`( `bigint_col_7` bigint, diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q index 01650f8..d5fe136 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q @@ -12,6 +12,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; -- Try with merge statements create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); diff --git a/ql/src/test/queries/clientpositive/mergejoin.q b/ql/src/test/queries/clientpositive/mergejoin.q index eecd105..381f253 100644 --- a/ql/src/test/queries/clientpositive/mergejoin.q +++ b/ql/src/test/queries/clientpositive/mergejoin.q @@ -9,6 +9,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; +set hive.tez.min.bloom.filter.entries=1; +set hive.tez.bigtable.minsize.semijoin.reduction=1; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index d32cb5c..7ce2e3c 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -1568,6 +1568,117 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +8224 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) @@ -1692,9 +1803,9 @@ POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### 8224 -PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1705,45 +1816,235 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +0 +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 3 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 5 + Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=214) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: srcpart_small @@ -1795,6 +2096,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=214) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 @@ -1810,8 +2123,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_date@ds=2008-04-08 PREHOOK: Input: default@srcpart_date@ds=2008-04-09 @@ -1819,8 +2133,9 @@ PREHOOK: Input: default@srcpart_small PREHOOK: Input: default@srcpart_small@ds=2008-04-08 PREHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 @@ -1828,7 +2143,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +0 PREHOOK: query: explain select * from alltypesorc_int join (select srcpart_date.key as key from srcpart_date union all diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 0182a46..bb0abdf 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -3398,11 +3398,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3432,7 +3430,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: true vectorized: true - Map 6 + Map 5 Map Operator Tree: TableScan alias: srcpart_date @@ -3460,14 +3458,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 7 + Map 6 Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + filterExpr: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -3504,19 +3502,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3558,25 +3543,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index e62d913..7153f06 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -41,17 +41,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + filterExpr: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -60,8 +58,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1) -> boolean, FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean) -> boolean - predicate: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) @@ -123,33 +121,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -157,7 +128,7 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -214,35 +185,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -278,17 +220,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_str is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)))) (type: boolean) + filterExpr: key_str is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -297,8 +237,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0, left NULL, right NULL) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean) -> boolean - predicate: (key_str is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)))) (type: boolean) + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: key_str is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string) @@ -360,33 +300,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -394,7 +307,7 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -451,35 +364,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -515,17 +399,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_str is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)))) (type: boolean) + filterExpr: key_str is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -534,8 +416,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0, left NULL, right NULL) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean) -> boolean - predicate: (key_str is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)))) (type: boolean) + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: key_str is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string) @@ -597,33 +479,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -631,7 +486,7 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -688,35 +543,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -752,18 +578,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter)))) (type: boolean) + filterExpr: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -772,8 +595,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1) -> boolean, FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean) -> boolean - predicate: (key_int is not null and (key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter)))) (type: boolean) + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) @@ -835,33 +658,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -869,10 +665,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 6 + Map 5 Map Operator Tree: TableScan alias: c @@ -905,33 +701,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -939,7 +708,7 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -998,64 +767,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1091,18 +802,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_str is not null and key_int is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + filterExpr: (key_str is not null and key_int is not null) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1111,8 +819,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean, FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0, left NULL, right NULL) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean) -> boolean - predicate: (key_str is not null and key_int is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean + predicate: (key_str is not null and key_int is not null) (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string), key_int (type: int) @@ -1174,60 +882,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 57 Data size: 10146 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1235,7 +889,7 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1292,64 +946,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=57) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1385,17 +981,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + filterExpr: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1404,8 +998,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1) -> boolean, FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1, left 0, right 0) -> boolean, VectorInBloomFilterColDynamicValue -> boolean) -> boolean) -> boolean - predicate: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) @@ -1467,33 +1061,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 29 Data size: 5162 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=29) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1501,7 +1068,7 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1558,35 +1125,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=29) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary - className: VectorGroupByOperator - vectorOutput: true - native: false - projectedOutputColumns: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1636,20 +1174,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + filterExpr: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + predicate: key_int is not null (type: boolean) Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) @@ -1680,19 +1216,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=47) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1729,18 +1252,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=47) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index d9fd706..cf3a96b 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -103,20 +103,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (partkey_bigint is not null and (partkey_bigint BETWEEN DynamicValue(RS_7_b_partkey_bigint_min) AND DynamicValue(RS_7_b_partkey_bigint_max) and in_bloom_filter(partkey_bigint, DynamicValue(RS_7_b_partkey_bigint_bloom_filter)))) (type: boolean) + filterExpr: partkey_bigint is not null (type: boolean) Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (partkey_bigint is not null and (partkey_bigint BETWEEN DynamicValue(RS_7_b_partkey_bigint_min) AND DynamicValue(RS_7_b_partkey_bigint_max) and in_bloom_filter(partkey_bigint, DynamicValue(RS_7_b_partkey_bigint_bloom_filter)))) (type: boolean) + predicate: partkey_bigint is not null (type: boolean) Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: partkey_bigint (type: bigint) @@ -147,19 +145,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=18) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -196,18 +181,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=18) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -239,20 +212,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (partkey_decimal is not null and (partkey_decimal BETWEEN DynamicValue(RS_7_b_partkey_decimal_min) AND DynamicValue(RS_7_b_partkey_decimal_max) and in_bloom_filter(partkey_decimal, DynamicValue(RS_7_b_partkey_decimal_bloom_filter)))) (type: boolean) + filterExpr: partkey_decimal is not null (type: boolean) Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (partkey_decimal is not null and (partkey_decimal BETWEEN DynamicValue(RS_7_b_partkey_decimal_min) AND DynamicValue(RS_7_b_partkey_decimal_max) and in_bloom_filter(partkey_decimal, DynamicValue(RS_7_b_partkey_decimal_bloom_filter)))) (type: boolean) + predicate: partkey_decimal is not null (type: boolean) Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: partkey_decimal (type: decimal(10,1)) @@ -283,19 +254,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(10,1)) Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: decimal(10,1)) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=16) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: decimal(10,1)), _col1 (type: decimal(10,1)), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -332,18 +290,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=16) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: decimal(10,1)), _col1 (type: decimal(10,1)), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -375,20 +321,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (partkey_double is not null and (partkey_double BETWEEN DynamicValue(RS_7_b_partkey_double_min) AND DynamicValue(RS_7_b_partkey_double_max) and in_bloom_filter(partkey_double, DynamicValue(RS_7_b_partkey_double_bloom_filter)))) (type: boolean) + filterExpr: partkey_double is not null (type: boolean) Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (partkey_double is not null and (partkey_double BETWEEN DynamicValue(RS_7_b_partkey_double_min) AND DynamicValue(RS_7_b_partkey_double_max) and in_bloom_filter(partkey_double, DynamicValue(RS_7_b_partkey_double_bloom_filter)))) (type: boolean) + predicate: partkey_double is not null (type: boolean) Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: partkey_double (type: double) @@ -419,19 +363,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -468,18 +399,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -511,20 +430,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (shipdate_date is not null and (shipdate_date BETWEEN DynamicValue(RS_7_b_shipdate_date_min) AND DynamicValue(RS_7_b_shipdate_date_max) and in_bloom_filter(shipdate_date, DynamicValue(RS_7_b_shipdate_date_bloom_filter)))) (type: boolean) + filterExpr: shipdate_date is not null (type: boolean) Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (shipdate_date is not null and (shipdate_date BETWEEN DynamicValue(RS_7_b_shipdate_date_min) AND DynamicValue(RS_7_b_shipdate_date_max) and in_bloom_filter(shipdate_date, DynamicValue(RS_7_b_shipdate_date_bloom_filter)))) (type: boolean) + predicate: shipdate_date is not null (type: boolean) Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: shipdate_date (type: date) @@ -555,19 +472,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: date) Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: date) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -604,18 +508,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -647,20 +539,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (shipdate_ts is not null and (shipdate_ts BETWEEN DynamicValue(RS_7_b_shipdate_ts_min) AND DynamicValue(RS_7_b_shipdate_ts_max) and in_bloom_filter(shipdate_ts, DynamicValue(RS_7_b_shipdate_ts_bloom_filter)))) (type: boolean) + filterExpr: shipdate_ts is not null (type: boolean) Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (shipdate_ts is not null and (shipdate_ts BETWEEN DynamicValue(RS_7_b_shipdate_ts_min) AND DynamicValue(RS_7_b_shipdate_ts_max) and in_bloom_filter(shipdate_ts, DynamicValue(RS_7_b_shipdate_ts_bloom_filter)))) (type: boolean) + predicate: shipdate_ts is not null (type: boolean) Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: shipdate_ts (type: timestamp) @@ -691,19 +581,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: timestamp) Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: timestamp) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -740,18 +617,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -783,20 +648,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (shipdate_string is not null and (shipdate_string BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, DynamicValue(RS_7_b_shipdate_string_bloom_filter)))) (type: boolean) + filterExpr: shipdate_string is not null (type: boolean) Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (shipdate_string is not null and (shipdate_string BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, DynamicValue(RS_7_b_shipdate_string_bloom_filter)))) (type: boolean) + predicate: shipdate_string is not null (type: boolean) Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: shipdate_string (type: string) @@ -827,19 +690,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -876,18 +726,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -919,20 +757,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (shipdate_char is not null and (shipdate_char BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, DynamicValue(RS_7_b_shipdate_char_bloom_filter)))) (type: boolean) + filterExpr: shipdate_char is not null (type: boolean) Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (shipdate_char is not null and (shipdate_char BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, DynamicValue(RS_7_b_shipdate_char_bloom_filter)))) (type: boolean) + predicate: shipdate_char is not null (type: boolean) Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: shipdate_char (type: char(10)) @@ -963,19 +799,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: char(10)) Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: char(10)) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1012,18 +835,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1055,20 +866,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (shipdate_varchar is not null and (shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, DynamicValue(RS_7_b_shipdate_varchar_bloom_filter)))) (type: boolean) + filterExpr: shipdate_varchar is not null (type: boolean) Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (shipdate_varchar is not null and (shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, DynamicValue(RS_7_b_shipdate_varchar_bloom_filter)))) (type: boolean) + predicate: shipdate_varchar is not null (type: boolean) Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: shipdate_varchar (type: varchar(10)) @@ -1099,19 +908,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: varchar(10)) Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: varchar(10)) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1148,18 +944,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 32609eb..087f916 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -824,101 +824,34 @@ POSTHOOK: query: explain analyze select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1/3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1/3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500/500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500/244 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500/244 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1/1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1/1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Plan optimized by CBO. + +Vertex dependency in root stage +Map 2 <- Map 1 (CUSTOM_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 2 + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=550/480 width=18) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_25] (rows=550/480 width=18) + BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [CUSTOM_EDGE] + MULTICAST [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=242/242 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=242/242 width=18) + predicate:key is not null + TableScan [TS_0] (rows=242/242 width=18) + default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500/500 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_14] (rows=500/500 width=18) + predicate:key is not null + TableScan [TS_3] (rows=500/500 width=18) + default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index da52b0a..3af4dbc 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -652,102 +652,34 @@ POSTHOOK: query: explain select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +Plan optimized by CBO. -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Vertex dependency in root stage +Map 2 <- Map 1 (CUSTOM_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 2 + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=550 width=18) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_25] (rows=550 width=18) + BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [CUSTOM_EDGE] + MULTICAST [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=242 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=242 width=18) + predicate:key is not null + TableScan [TS_0] (rows=242 width=18) + default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_14] (rows=500 width=18) + predicate:key is not null + TableScan [TS_3] (rows=500 width=18) + default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"]