diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java index 721a9b9998..6d25a208b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java @@ -37,6 +37,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Stack; @@ -97,13 +98,25 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // Insert a new top n key operator between the group by operator and its parent TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), groupByKeyColumns); - Operator newOperator = OperatorFactory.getAndMakeChild( - groupByOperator.getCompilationOpContext(), (OperatorDesc) topNKeyDesc, - new RowSchema(groupByOperator.getSchema()), groupByOperator.getParentOperators()); - newOperator.getChildOperators().add(groupByOperator); - groupByOperator.getParentOperators().add(newOperator); - parentOperator.removeChild(groupByOperator); - + copyDown(groupByOperator, topNKeyDesc); return null; } + + static TopNKeyOperator copyDown(Operator child, OperatorDesc operatorDesc) { + final List> parents = child.getParentOperators(); + + final Operator newOperator = + OperatorFactory.getAndMakeChild( + child.getCompilationOpContext(), operatorDesc, + new RowSchema(parents.get(0).getSchema()), child.getParentOperators()); + newOperator.setParentOperators(new ArrayList<>(parents)); + newOperator.setChildOperators(new ArrayList<>(Collections.singletonList(child))); + + for (Operator parent : parents) { + parent.removeChild(child); + } + child.setParentOperators(new ArrayList<>(Collections.singletonList(newOperator))); + + return (TopNKeyOperator) newOperator; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java new file mode 100644 index 0000000000..8b7d7ed7b7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdownProcessor.java @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor.copyDown; + +public class TopNKeyPushdownProcessor implements NodeProcessor { + private static final Logger LOG = LoggerFactory.getLogger(TopNKeyPushdownProcessor.class); + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + pushdown((TopNKeyOperator) nd); + return null; + } + + private void pushdown(TopNKeyOperator topNKey) throws SemanticException { + + final Operator parent = + topNKey.getParentOperators().get(0); + + switch (parent.getType()) { + case SELECT: + pushdownThroughSelect(topNKey); + break; + + case FORWARD: + moveDown(topNKey); + pushdown(topNKey); + break; + + case GROUPBY: + pushdownThroughGroupBy(topNKey); + break; + + case REDUCESINK: + pushdownThroughReduceSink(topNKey); + break; + + case MERGEJOIN: + case JOIN: + { + final CommonJoinOperator join = + (CommonJoinOperator) parent; + final JoinCondDesc[] joinConds = join.getConf().getConds(); + final JoinCondDesc firstJoinCond = joinConds[0]; + for (JoinCondDesc joinCond : joinConds) { + if (!firstJoinCond.equals(joinCond)) { + return; + } + } + switch (firstJoinCond.getType()) { + case JoinDesc.FULL_OUTER_JOIN: + pushdownThroughFullOuterJoin(topNKey); + break; + + case JoinDesc.LEFT_OUTER_JOIN: + pushdownThroughLeftOuterJoin(topNKey); + break; + + case JoinDesc.RIGHT_OUTER_JOIN: + pushdownThroughRightOuterJoin(topNKey); + break; + } + } + break; + + case TOPNKEY: + if (hasSameTopNKeyDesc(parent, topNKey.getConf())) { + parent.removeChildAndAdoptItsChildren(topNKey); + } + break; + } + } + + private void pushdownThroughSelect(TopNKeyOperator topNKey) throws SemanticException { + + final SelectOperator select = (SelectOperator) topNKey.getParentOperators().get(0); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Map columns + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), + select.getColumnExprMap()); + if (mappedColumns.isEmpty()) { + return; + } + + // Move down + topNKeyDesc.setColumnSortOrder(topNKeyDesc.getColumnSortOrder()); + topNKeyDesc.setKeyColumns(mappedColumns); + moveDown(topNKey); + pushdown(topNKey); + } + + private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticException { + /* + * Push through GroupBy. No grouping sets. If TopNKey expression is same as GroupBy expression, + * we can push it and remove it from above GroupBy. If expression in TopNKey shared common + * prefix with GroupBy, TopNKey could be pushed through GroupBy using that prefix and kept above + * it. + */ + final GroupByOperator groupBy = (GroupByOperator) topNKey.getParentOperators().get(0); + final GroupByDesc groupByDesc = groupBy.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Check grouping sets + if (groupByDesc.isGroupingSetsPresent()) { + return; + } + + // Map columns + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), + groupByDesc.getColumnExprMap()); + // If TopNKey expression is same as GroupBy expression + if (!ExprNodeDescUtils.isSame(groupByDesc.getKeys(), mappedColumns)) { + return; + } + + // We can push it and remove it from above GroupBy. + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + topNKeyDesc.getColumnSortOrder(), mappedColumns); + groupBy.removeChildAndAdoptItsChildren(topNKey); + pushdown(copyDown(groupBy, newTopNKeyDesc)); + } + + private void pushdownThroughReduceSink(TopNKeyOperator topNKey) throws SemanticException { + /* + * Push through ReduceSink. If TopNKey expression is same as ReduceSink expression and order is + * the same, we can push it and remove it from above ReduceSink. If expression in TopNKey shared + * common prefix with ReduceSink including same order, TopNKey could be pushed through + * ReduceSink using that prefix and kept above it. + */ + final ReduceSinkOperator reduceSink = (ReduceSinkOperator) topNKey.getParentOperators().get(0); + final ReduceSinkDesc reduceSinkDesc = reduceSink.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Check orders + if (!reduceSinkDesc.getOrder().equals(topNKeyDesc.getColumnSortOrder())) { + return; + } + + // Map columns + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), + reduceSinkDesc.getColumnExprMap()); + // If TopNKey expression is same as ReduceSink expression + if (!ExprNodeDescUtils.isSame(reduceSinkDesc.getKeyCols(), mappedColumns)) { + return; + } + + // We can push it and remove it from above ReduceSink. + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + topNKeyDesc.getColumnSortOrder(), mappedColumns); + reduceSink.removeChildAndAdoptItsChildren(topNKey); + pushdown(copyDown(reduceSink, newTopNKeyDesc)); + } + + private void pushdownThroughFullOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + /* + * Push through FOJ. Push TopNKey expression without keys to largest input. Keep on top of FOJ. + */ + final CommonJoinOperator join = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + final ReduceSinkOperator leftInput = (ReduceSinkOperator) join.getParentOperators().get(0); + final ReduceSinkOperator rightInput = (ReduceSinkOperator) join.getParentOperators().get(1); + + // Check null orders + if (!checkNullOrder(leftInput.getConf())) { + return; + } + if (!checkNullOrder(rightInput.getConf())) { + return; + } + + // Map columns + final ReduceSinkOperator joinInput; + final List mappedColumns; + if (leftInput.getStatistics().getDataSize() > rightInput.getStatistics().getDataSize()) { + joinInput = rightInput; + mappedColumns = new ArrayList<>(joinInput.getConf().getKeyCols()); + for (JoinCondDesc cond : join.getConf().getConds()) { + mappedColumns.remove(cond.getRight()); + } + } else { + joinInput = leftInput; + mappedColumns = new ArrayList<>(joinInput.getConf().getKeyCols()); + for (JoinCondDesc cond : join.getConf().getConds()) { + mappedColumns.remove(cond.getLeft()); + } + } + if (mappedColumns.isEmpty()) { + return; + } + + // Copy down + final String mappedOrder = mapOrder(topNKeyDesc.getColumnSortOrder(), + joinInput.getConf().getKeyCols(), mappedColumns); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), mappedOrder, + mappedColumns); + pushdown(copyDown(joinInput, newTopNKeyDesc)); + } + + private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + pushdownThroughLeftOrRightOuterJoin(topNKey, 0); + } + + private void pushdownThroughRightOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + pushdownThroughLeftOrRightOuterJoin(topNKey, 1); + } + + private void pushdownThroughLeftOrRightOuterJoin(TopNKeyOperator topNKey, int position) + throws SemanticException { + /* + * Push through LOJ. If TopNKey expression refers fully to expressions from left input, push + * with rewriting of expressions and remove from top of LOJ. If TopNKey expression has a prefix + * that refers to expressions from left input, push with rewriting of those expressions and keep + * on top of LOJ. + */ + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + final CommonJoinOperator join = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + final List> joinInputs = join.getParentOperators(); + final ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) joinInputs.get(position); + final ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); + + // Check null order + if (!checkNullOrder(reduceSinkDesc)) { + return; + } + + // Map columns + final List mappedColumns = mapColumns(mapColumns(topNKeyDesc.getKeyColumns(), + join.getColumnExprMap()), reduceSinkOperator.getColumnExprMap()); + if (mappedColumns.isEmpty()) { + return; + } + + // Copy down + final String mappedOrder = mapOrder(topNKeyDesc.getColumnSortOrder(), + reduceSinkDesc.getKeyCols(), mappedColumns); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), mappedOrder, + mappedColumns); + pushdown(copyDown(reduceSinkOperator, newTopNKeyDesc)); + + // If all columns are mapped, remove from top + if (topNKeyDesc.getKeyColumns().size() == mappedColumns.size()) { + join.removeChildAndAdoptItsChildren(topNKey); + } + } + + private static boolean hasSameTopNKeyDesc(Operator operator, + TopNKeyDesc desc) { + + if (operator instanceof TopNKeyOperator) { + final TopNKeyOperator topNKey = (TopNKeyOperator) operator; + final TopNKeyDesc opDesc = topNKey.getConf(); + if (opDesc.isSame(desc)) { + return true; + } + } + return false; + } + + private static String mapOrder(String order, List parentCols, List + mappedCols) { + + final StringBuilder builder = new StringBuilder(); + int index = 0; + for (ExprNodeDesc mappedCol : mappedCols) { + if (parentCols.contains(mappedCol)) { + builder.append(order.charAt(index++)); + } else { + builder.append("+"); + } + } + return builder.toString(); + } + + private static List mapColumns(List columns, Map + colExprMap) { + + if (colExprMap == null) { + return columns; + } + final List mappedColumns = new ArrayList<>(); + for (ExprNodeDesc column : columns) { + final String columnName = column.getExprString(); + if (colExprMap.containsKey(columnName)) { + mappedColumns.add(colExprMap.get(columnName)); + } + } + return mappedColumns; + } + + private static void moveDown(TopNKeyOperator topNKey) throws SemanticException { + + assert topNKey.getNumParent() == 1; + final Operator parent = topNKey.getParentOperators().get(0); + final List> grandParents = parent.getParentOperators(); + parent.removeChildAndAdoptItsChildren(topNKey); + for (Operator grandParent : grandParents) { + grandParent.replaceChild(parent, topNKey); + } + topNKey.setParentOperators(new ArrayList<>(grandParents)); + topNKey.setChildOperators(new ArrayList<>(Collections.singletonList(parent))); + parent.setParentOperators(new ArrayList<>(Collections.singletonList(topNKey))); + } + + private static boolean checkNullOrder(ReduceSinkDesc reduceSinkDesc) { + + final String order = reduceSinkDesc.getOrder(); + final String nullOrder = reduceSinkDesc.getNullOrder(); + if (nullOrder == null) { + for (int i = 0; i < order.length(); i++) { + if (order.charAt(i) != '+') { + return false; + } + } + } else { + for (int i = 0; i < nullOrder.length(); i++) { + if (nullOrder.charAt(i) != 'a') { + return false; + } + } + } + return true; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index fdc963506c..a1499c983a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TerminalOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -82,6 +83,7 @@ import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer; import org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor; +import org.apache.hadoop.hive.ql.optimizer.TopNKeyPushdownProcessor; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; @@ -421,6 +423,12 @@ private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin()); + if (procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) { + opRules.put( + new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"), + new TopNKeyPushdownProcessor()); + } + // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); diff --git ql/src/test/queries/clientpositive/topnkey.q ql/src/test/queries/clientpositive/topnkey.q index e02a41dd57..7a4f9f1a87 100644 --- ql/src/test/queries/clientpositive/topnkey.q +++ ql/src/test/queries/clientpositive/topnkey.q @@ -24,8 +24,29 @@ SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; -explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; +explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +set hive.optimize.topnkey=false; + +explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; diff --git ql/src/test/queries/clientpositive/vector_topnkey.q ql/src/test/queries/clientpositive/vector_topnkey.q index e1b7d26afe..2a0cab80ae 100644 --- ql/src/test/queries/clientpositive/vector_topnkey.q +++ ql/src/test/queries/clientpositive/vector_topnkey.q @@ -14,17 +14,39 @@ set hive.tez.dynamic.partition.pruning=true; set hive.stats.fetch.column.stats=true; set hive.cbo.enable=true; -explain vectorization detail +explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; -explain vectorization detail +explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; -explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +set hive.optimize.topnkey=false; + +explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; diff --git ql/src/test/results/clientpositive/llap/bucket_groupby.q.out ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 726d46b479..0ddde98fa3 100644 --- ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -64,15 +64,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '100') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string) @@ -197,15 +197,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string) @@ -304,15 +304,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: length(key) (type: int) - outputColumnNames: _col0 + Top N Key Operator + sort order: + + keys: length(key) (type: int) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) + top n: 10 + Select Operator + expressions: length(key) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: _col0 (type: int) @@ -391,15 +391,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: abs(length(key)) (type: int) - outputColumnNames: _col0 + Top N Key Operator + sort order: + + keys: abs(length(key)) (type: int) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) + top n: 10 + Select Operator + expressions: abs(length(key)) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: _col0 (type: int) @@ -479,15 +479,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string) @@ -587,15 +587,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '101') (type: boolean) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: value + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: value (type: string) + top n: 10 + Select Operator + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: value (type: string) @@ -1193,15 +1193,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '102') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() bucketGroup: true @@ -1302,15 +1302,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '102') (type: boolean) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: value + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: value (type: string) + top n: 10 + Select Operator + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: value (type: string) @@ -1511,15 +1511,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '103') (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 10 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() bucketGroup: true @@ -1620,15 +1620,15 @@ STAGE PLANS: alias: clustergroupby filterExpr: (ds = '103') (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: key (type: string), value (type: string) + top n: 10 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: count() keys: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index be1084b1f2..1f35918dde 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -1671,15 +1671,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col1 (type: string) + top n: 10 + Select Operator + expressions: key (type: string), value (type: string), UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 Group By Operator aggregations: min(_col2), max(_col3) keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index d99b3704c0..86d0acffa1 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1264,10 +1264,10 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Top N Key Operator [TNK_15] (rows=10 width=101) - keys:_col1, _col0,sort order:++,top n:1 - Select Operator [SEL_5] (rows=10 width=101) - Output:["_col0","_col1"] + Select Operator [SEL_5] (rows=10 width=101) + Output:["_col0","_col1"] + Top N Key Operator [TNK_15] (rows=10 width=101) + keys:(((UDFToFloat(_col1) + _col2) + 1.0) + 2.0), _col3,sort order:++,top n:1 Group By Operator [GBY_4] (rows=10 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 1 [SIMPLE_EDGE] llap diff --git ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 4fc1419acd..804b418123 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -195,15 +195,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -296,15 +296,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) @@ -401,15 +401,15 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double) - outputColumnNames: cdouble + Top N Key Operator + sort order: + + keys: cdouble (type: double) Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cdouble (type: double) + top n: 20 + Select Operator + expressions: cdouble (type: double) + outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator keys: cdouble (type: double) mode: hash @@ -932,15 +932,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 2 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 Group By Operator aggregations: count() keys: key (type: string) @@ -954,11 +954,15 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Top N Key Operator - sort order: + - keys: key (type: string) + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 3 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 3 Group By Operator aggregations: count() keys: key (type: string) diff --git ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 1027bfe85a..7d75933f86 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -196,15 +196,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -311,15 +311,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) @@ -430,15 +430,15 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double) - outputColumnNames: cdouble + Top N Key Operator + sort order: + + keys: cdouble (type: double) Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cdouble (type: double) + top n: 20 + Select Operator + expressions: cdouble (type: double) + outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator keys: cdouble (type: double) mode: hash diff --git ql/src/test/results/clientpositive/llap/offset_limit.q.out ql/src/test/results/clientpositive/llap/offset_limit.q.out index 97d2ac25b4..5d33787201 100644 --- ql/src/test/results/clientpositive/llap/offset_limit.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit.q.out @@ -22,15 +22,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 20 + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 2e8d5f375f..03fcd10969 100644 --- ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -197,15 +197,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 30 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -299,15 +299,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: value (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 30 + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 Group By Operator aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) @@ -405,15 +405,15 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double) - outputColumnNames: cdouble + Top N Key Operator + sort order: + + keys: cdouble (type: double) Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cdouble (type: double) + top n: 30 + Select Operator + expressions: cdouble (type: double) + outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 Group By Operator keys: cdouble (type: double) mode: hash diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index a804e3c193..0ee86b5614 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -229,31 +229,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 4:int, val 500)(children: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int) predicate: (st1.f1 > 500) (type: boolean) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: st1.f1 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + Top N Key Operator + sort order: + + keys: st1.f1 (type: int) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 4:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + native: true + Select Operator + expressions: st1.f1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int + Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint + aggregators: VectorUDAFSumLong(col 5:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 4:int + keyExpressions: col 5:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index baeb60e045..e2f07050bb 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -205,31 +205,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l1[1] (type: int), l1[0] (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 7] - selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int + Top N Key Operator + sort order: + + keys: l1[1] (type: int) Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int + native: true + Select Operator + expressions: l1[1] (type: int), l1[0] (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7, 8] + selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int + Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 7:int) -> bigint + aggregators: VectorUDAFSumLong(col 8:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:int + keyExpressions: col 7:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -503,31 +503,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l1[1] (type: int), l1[0] (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 7] - selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int + Top N Key Operator + sort order: + + keys: l1[1] (type: int) Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int + native: true + Select Operator + expressions: l1[1] (type: int), l1[0] (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7, 8] + selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int + Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 7:int) -> bigint + aggregators: VectorUDAFSumLong(col 8:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:int + keyExpressions: col 7:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -801,31 +801,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array, col 0:int) -> 6:int) predicate: (l1[0] > 1000) (type: boolean) Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l1[1] (type: int), l1[0] (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 7] - selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int, ListIndexColScalar(col 2:array, col 0:int) -> 7:int + Top N Key Operator + sort order: + + keys: l1[1] (type: int) Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 6:int + native: true + Select Operator + expressions: l1[1] (type: int), l1[0] (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7, 8] + selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int + Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 7:int) -> bigint + aggregators: VectorUDAFSumLong(col 8:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:int + keyExpressions: col 7:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index b036cddbea..7075c05f1d 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -221,31 +221,31 @@ STAGE PLANS: predicateExpression: FilterStringColLikeStringScalar(col 8:string, pattern v100%)(children: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string) predicate: (stringmap['k1'] like 'v100%') (type: boolean) Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: stringmap['k1'] (type: string), intmap[123] (type: int), doublemap[123.123] (type: double) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [8, 9, 10] - selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 9:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 10:double + Top N Key Operator + sort order: + + keys: stringmap['k1'] (type: string) Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 8:string + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 8:string + native: true + Select Operator + expressions: stringmap['k1'] (type: string), intmap[123] (type: int), doublemap[123.123] (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [9, 10, 11] + selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 9:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 10:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 11:double + Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), sum(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 9:int) -> bigint, VectorUDAFSumDouble(col 10:double) -> double + aggregators: VectorUDAFSumLong(col 10:int) -> bigint, VectorUDAFSumDouble(col 11:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 8:string + keyExpressions: col 9:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index fec8093617..075f985f16 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -229,31 +229,31 @@ STAGE PLANS: predicateExpression: FilterLongColGreaterLongScalar(col 4:int, val 500)(children: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int) predicate: (st1.f1 > 500) (type: boolean) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: st1.f1 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + Top N Key Operator + sort order: + + keys: st1.f1 (type: int) Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 4:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 4:int + native: true + Select Operator + expressions: st1.f1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int + Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint + aggregators: VectorUDAFSumLong(col 5:int) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 4:int + keyExpressions: col 5:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] diff --git ql/src/test/results/clientpositive/llap/topnkey.q.out ql/src/test/results/clientpositive/llap/topnkey.q.out index c1d8874bb9..cc0f2da7c2 100644 --- ql/src/test/results/clientpositive/llap/topnkey.q.out +++ ql/src/test/results/clientpositive/llap/topnkey.q.out @@ -22,15 +22,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 5 + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -122,15 +122,15 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 Group By Operator keys: key (type: string) mode: hash @@ -194,16 +194,12 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -213,8 +209,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -237,7 +234,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src2 @@ -269,23 +266,43 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -303,16 +320,538 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) + top n: 5 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 060281ac4e..2c5f2e2a68 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -133,23 +133,23 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2] + Top N Key Operator + sort order: + + keys: i (type: int) Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 2:int + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [2] + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) Group By Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index b58de039f2..9eecba5f6c 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -93,24 +93,24 @@ STAGE PLANS: Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: value (type: char(20)), UDFToInteger(key) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 3] - selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Top N Key Operator + sort order: + + keys: value (type: char(20)) Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: char(20)) - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 1:char(20) + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 1:char(20) + native: true + Select Operator + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -298,24 +298,24 @@ STAGE PLANS: Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: value (type: char(20)), UDFToInteger(key) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 3] - selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Top N Key Operator + sort order: - + keys: value (type: char(20)) Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: - - keys: _col0 (type: char(20)) - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 1:char(20) + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 1:char(20) + native: true + Select Operator + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int + Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index 1f49804ca6..4eabff4bbb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -60,30 +60,30 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] + Top N Key Operator + sort order: +++ + keys: a (type: string), b (type: string) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string + native: true + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -120,7 +120,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -270,30 +270,30 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] + Top N Key Operator + sort order: +++ + keys: a (type: string), b (type: string) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string + native: true + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -330,7 +330,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -480,30 +480,30 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] + Top N Key Operator + sort order: +++ + keys: a (type: string), b (type: string) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string + native: true + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -540,7 +540,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -690,28 +690,28 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string), b (type: string), c (type: string) - outputColumnNames: a, b, c - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2] + Top N Key Operator + sort order: ++++ + keys: a (type: string), b (type: string), c (type: string) Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++++ - keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) - Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string, col 1:string, col 2:string + native: true + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 5:bigint + keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] @@ -747,7 +747,7 @@ STAGE PLANS: includeColumns: [0, 1, 2] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -894,23 +894,23 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: a (type: string) - outputColumnNames: a - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] + Top N Key Operator + sort order: + + keys: a (type: string) Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: a (type: string) - Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Select Operator + expressions: a (type: string) + outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -1084,31 +1084,31 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:string, 3:ROW__ID:struct] - Select Operator - expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6] - selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 6:double + Top N Key Operator + sort order: + + keys: (UDFToDouble(a) + UDFToDouble(b)) (type: double) Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: double) - Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 6:double + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 6:double + native: true + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [7] + selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 7:double + Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 6:double + keyExpressions: col 7:double native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1145,7 +1145,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index bdcc286181..4c115014c0 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -258,23 +258,23 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - Select Operator - expressions: ss_ticket_number (type: int) - outputColumnNames: ss_ticket_number - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [9] + Top N Key Operator + sort order: + + keys: ss_ticket_number (type: int) Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ss_ticket_number (type: int) - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 9:int + top n: 20 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:int + native: true + Select Operator + expressions: ss_ticket_number (type: int) + outputColumnNames: ss_ticket_number + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [9] + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index a4f32f16fa..de5bb5cf9a 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -343,29 +343,29 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Select Operator - expressions: concat(concat(concat('Quarter ', CAST( UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0D) + 1.0D)) AS STRING)), '-'), CAST( year(dt) AS STRING)) (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [20] - selectExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 20:string + Top N Key Operator + sort order: + + keys: concat(concat(concat('Quarter ', CAST( UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0D) + 1.0D)) AS STRING)), '-'), CAST( year(dt) AS STRING)) (type: string) Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE - top n: 50 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 20:string + top n: 50 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 20:string + native: true + Select Operator + expressions: concat(concat(concat('Quarter ', CAST( UDFToInteger(((UDFToDouble((month(dt) - 1)) / 3.0D) + 1.0D)) AS STRING)), '-'), CAST( year(dt) AS STRING)) (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [21] + selectExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 21:string + Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 20:string + keyExpressions: col 21:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index 16803c9544..b27e72b8a8 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -26,37 +26,17 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) + top n: 5 + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string - native: true Group By Operator aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -65,12 +45,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -85,38 +59,17 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:string, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -124,12 +77,6 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -138,36 +85,19 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,10 +123,10 @@ POSTHOOK: Input: default@src 100 200 103 206 104 208 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -221,34 +151,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: key - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] + Top N Key Operator + sort order: + + keys: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string - native: true Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: key (type: string) mode: hash outputColumnNames: _col0 @@ -257,12 +169,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -276,36 +182,16 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY._col0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 @@ -313,12 +199,6 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -326,36 +206,19 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -381,11 +244,11 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -400,8 +263,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -410,34 +274,17 @@ STAGE PLANS: alias: src1 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:string) predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -450,46 +297,23 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: src2 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:string) predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -503,12 +327,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -520,51 +338,60 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -577,16 +404,640 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) + top n: 5 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 3d5bea143b..b38e446a07 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -483,23 +483,23 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] - Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] + Top N Key Operator + sort order: + + keys: ctinyint (type: tinyint) Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ctinyint (type: tinyint) - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:tinyint + top n: 20 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:tinyint + native: true + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator diff --git ql/src/test/results/clientpositive/perf/tez/query10.q.out ql/src/test/results/clientpositive/perf/tez/query10.q.out index 5b55d44d91..e8bb5e3472 100644 --- ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -156,10 +156,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Group By Operator [GBY_62] (rows=2090864244 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Top N Key Operator [TNK_103] (rows=2090864244 width=88) - keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100 - Select Operator [SEL_61] (rows=2090864244 width=88) - Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Select Operator [SEL_61] (rows=2090864244 width=88) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Top N Key Operator [TNK_103] (rows=2090864244 width=88) + keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100 Filter Operator [FIL_60] (rows=2090864244 width=88) predicate:(_col15 is not null or _col17 is not null) Merge Join Operator [MERGEJOIN_173] (rows=2090864244 width=88) diff --git ql/src/test/results/clientpositive/perf/tez/query15.q.out ql/src/test/results/clientpositive/perf/tez/query15.q.out index e1eca99d95..f3dd0e85c4 100644 --- ql/src/test/results/clientpositive/perf/tez/query15.q.out +++ ql/src/test/results/clientpositive/perf/tez/query15.q.out @@ -66,10 +66,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_24] (rows=348467716 width=135) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col4 - Top N Key Operator [TNK_44] (rows=348467716 width=135) - keys:_col4,sort order:+,top n:100 - Select Operator [SEL_23] (rows=348467716 width=135) - Output:["_col4","_col7"] + Select Operator [SEL_23] (rows=348467716 width=135) + Output:["_col4","_col7"] + Top N Key Operator [TNK_44] (rows=348467716 width=135) + keys:_col4,sort order:+,top n:100 Filter Operator [FIL_22] (rows=348467716 width=135) predicate:((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) Merge Join Operator [MERGEJOIN_77] (rows=348467716 width=135) diff --git ql/src/test/results/clientpositive/perf/tez/query17.q.out ql/src/test/results/clientpositive/perf/tez/query17.q.out index 6b3549216c..ff2c2b3100 100644 --- ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -128,10 +128,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_49] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_93] (rows=843315281 width=88) - keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_47] (rows=843315281 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Select Operator [SEL_47] (rows=843315281 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Top N Key Operator [TNK_93] (rows=843315281 width=88) + keys:_col9, _col10, _col25,sort order:+++,top n:100 Merge Join Operator [MERGEJOIN_213] (rows=843315281 width=88) Conds:RS_44._col3=RS_251._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] <-Map 20 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query27.q.out ql/src/test/results/clientpositive/perf/tez/query27.q.out index 20da0af5cc..2cde7f8f47 100644 --- ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -78,10 +78,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_29] (rows=2529945843 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_56] (rows=843315281 width=88) - keys:_col0, _col1, 0L,sort order:+++,top n:100 - Select Operator [SEL_27] (rows=843315281 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Select Operator [SEL_27] (rows=843315281 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Top N Key Operator [TNK_56] (rows=843315281 width=88) + keys:_col17, _col15,sort order:+++,top n:100 Merge Join Operator [MERGEJOIN_100] (rows=843315281 width=88) Conds:RS_24._col1=RS_127._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"] <-Map 14 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query35.q.out ql/src/test/results/clientpositive/perf/tez/query35.q.out index 4ad92c2fed..8a17c9d322 100644 --- ql/src/test/results/clientpositive/perf/tez/query35.q.out +++ ql/src/test/results/clientpositive/perf/tez/query35.q.out @@ -152,10 +152,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_62] (rows=2090864244 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_104] (rows=2090864244 width=88) - keys:_col4, _col6, _col7, _col8, _col9, _col10,sort order:++++++,top n:100 - Select Operator [SEL_61] (rows=2090864244 width=88) - Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Select Operator [SEL_61] (rows=2090864244 width=88) + Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Top N Key Operator [TNK_104] (rows=2090864244 width=88) + keys:_col4, _col6, _col7, _col8, _col9, _col10,sort order:++++++,top n:100 Filter Operator [FIL_60] (rows=2090864244 width=88) predicate:(_col12 is not null or _col14 is not null) Merge Join Operator [MERGEJOIN_174] (rows=2090864244 width=88) diff --git ql/src/test/results/clientpositive/perf/tez/query40.q.out ql/src/test/results/clientpositive/perf/tez/query40.q.out index a3b6c03801..619684a46f 100644 --- ql/src/test/results/clientpositive/perf/tez/query40.q.out +++ ql/src/test/results/clientpositive/perf/tez/query40.q.out @@ -85,10 +85,10 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_29] (rows=421645953 width=135) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Top N Key Operator [TNK_55] (rows=421645953 width=135) - keys:_col0, _col1,sort order:++,top n:100 - Select Operator [SEL_27] (rows=421645953 width=135) - Output:["_col0","_col1","_col2","_col3"] + Select Operator [SEL_27] (rows=421645953 width=135) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_55] (rows=421645953 width=135) + keys:_col14, _col11,sort order:++,top n:100 Merge Join Operator [MERGEJOIN_101] (rows=421645953 width=135) Conds:RS_24._col1=RS_120._col0(Inner),Output:["_col4","_col7","_col9","_col11","_col14"] <-Map 13 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query43.q.out ql/src/test/results/clientpositive/perf/tez/query43.q.out index afa3363caa..6a28b7e0b5 100644 --- ql/src/test/results/clientpositive/perf/tez/query43.q.out +++ ql/src/test/results/clientpositive/perf/tez/query43.q.out @@ -64,10 +64,10 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_17] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Top N Key Operator [TNK_33] (rows=696954748 width=88) - keys:_col0, _col1,sort order:++,top n:100 - Select Operator [SEL_15] (rows=696954748 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Select Operator [SEL_15] (rows=696954748 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Top N Key Operator [TNK_33] (rows=696954748 width=88) + keys:_col8, _col7,sort order:++,top n:100 Merge Join Operator [MERGEJOIN_55] (rows=696954748 width=88) Conds:RS_12._col1=RS_66._col0(Inner),Output:["_col2","_col5","_col7","_col8"] <-Map 8 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query45.q.out ql/src/test/results/clientpositive/perf/tez/query45.q.out index 6458811aa6..5edc7f5615 100644 --- ql/src/test/results/clientpositive/perf/tez/query45.q.out +++ ql/src/test/results/clientpositive/perf/tez/query45.q.out @@ -73,10 +73,10 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_52] (rows=191667562 width=152) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col7 - Top N Key Operator [TNK_82] (rows=191667562 width=152) - keys:_col8, _col7,sort order:++,top n:100 - Select Operator [SEL_51] (rows=191667562 width=152) - Output:["_col3","_col7","_col8"] + Select Operator [SEL_51] (rows=191667562 width=152) + Output:["_col3","_col7","_col8"] + Top N Key Operator [TNK_82] (rows=191667562 width=152) + keys:_col8, _col7,sort order:++,top n:100 Filter Operator [FIL_50] (rows=191667562 width=152) predicate:((substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or CASE WHEN ((_col14 = 0L)) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col13 is null) THEN (null) WHEN ((_col15 < _col14)) THEN (null) ELSE (false) END) Select Operator [SEL_49] (rows=191667562 width=152) diff --git ql/src/test/results/clientpositive/perf/tez/query50.q.out ql/src/test/results/clientpositive/perf/tez/query50.q.out index efbae5cbf3..24944b739a 100644 --- ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -149,10 +149,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Group By Operator [GBY_29] (rows=766650239 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Top N Key Operator [TNK_56] (rows=766650239 width=88) - keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,sort order:++++++++++,top n:100 - Select Operator [SEL_27] (rows=766650239 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Select Operator [SEL_27] (rows=766650239 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Top N Key Operator [TNK_56] (rows=766650239 width=88) + keys:_col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23,sort order:++++++++++,top n:100 Merge Join Operator [MERGEJOIN_120] (rows=766650239 width=88) Conds:RS_24._col10=RS_143._col0(Inner),Output:["_col0","_col7","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 15 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query66.q.out ql/src/test/results/clientpositive/perf/tez/query66.q.out index 432dd7e710..363ccdf0a0 100644 --- ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -488,10 +488,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_293] (rows=316240137 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_292] (rows=316240137 width=135) - keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 - Select Operator [SEL_291] (rows=316240137 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Select Operator [SEL_292] (rows=316240137 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Top N Key Operator [TNK_291] (rows=316240137 width=135) + keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 Group By Operator [GBY_290] (rows=210822976 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 15 [SIMPLE_EDGE] @@ -612,10 +612,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_271] (rows=316240137 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_270] (rows=316240137 width=135) - keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 - Select Operator [SEL_269] (rows=316240137 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Select Operator [SEL_270] (rows=316240137 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Top N Key Operator [TNK_269] (rows=316240137 width=135) + keys:_col0, _col1, _col2, _col3, _col4, _col5,sort order:++++++,top n:100 Group By Operator [GBY_268] (rows=105417161 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 5 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/tez/query69.q.out ql/src/test/results/clientpositive/perf/tez/query69.q.out index 4aad667ba3..f7de83e6be 100644 --- ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -133,10 +133,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_66] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_105] (rows=383325119 width=88) - keys:_col6, _col7, _col8, _col9, _col10,sort order:+++++,top n:100 - Select Operator [SEL_65] (rows=383325119 width=88) - Output:["_col6","_col7","_col8","_col9","_col10"] + Select Operator [SEL_65] (rows=383325119 width=88) + Output:["_col6","_col7","_col8","_col9","_col10"] + Top N Key Operator [TNK_105] (rows=383325119 width=88) + keys:_col6, _col7, _col8, _col9, _col10,sort order:+++++,top n:100 Filter Operator [FIL_64] (rows=383325119 width=88) predicate:_col14 is null Merge Join Operator [MERGEJOIN_181] (rows=766650239 width=88) diff --git ql/src/test/results/clientpositive/perf/tez/query99.q.out ql/src/test/results/clientpositive/perf/tez/query99.q.out index 456fd8c96f..ec82189be1 100644 --- ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -102,10 +102,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_29] (rows=421645953 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_57] (rows=421645953 width=135) - keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_27] (rows=421645953 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Select Operator [SEL_27] (rows=421645953 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Top N Key Operator [TNK_57] (rows=421645953 width=135) + keys:substr(_col10, 1, 20), _col12, _col8,sort order:+++,top n:100 Merge Join Operator [MERGEJOIN_101] (rows=421645953 width=135) Conds:RS_24._col3=RS_128._col0(Inner),Output:["_col0","_col1","_col8","_col10","_col12"] <-Map 14 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/tez/topnkey.q.out ql/src/test/results/clientpositive/tez/topnkey.q.out index 66b9191a48..f42faefc7a 100644 --- ql/src/test/results/clientpositive/tez/topnkey.q.out +++ ql/src/test/results/clientpositive/tez/topnkey.q.out @@ -29,10 +29,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_3] (rows=250 width=95) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_11] (rows=500 width=178) - keys:_col0,sort order:+,top n:5 - Select Operator [SEL_1] (rows=500 width=178) - Output:["_col0","_col1"] + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_11] (rows=500 width=178) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -80,10 +80,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_2] (rows=250 width=87) Output:["_col0"],keys:key - Top N Key Operator [TNK_10] (rows=500 width=87) - keys:key,sort order:+,top n:5 - Select Operator [SEL_1] (rows=500 width=87) - Output:["key"] + Select Operator [SEL_1] (rows=500 width=87) + Output:["key"] + Top N Key Operator [TNK_10] (rows=500 width=87) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=87) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] @@ -100,63 +100,327 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:5 Stage-1 - Reducer 3 - File Output Operator [FS_13] - Limit [LIM_12] (rows=5 width=178) + Reducer 4 + File Output Operator [FS_17] + Limit [LIM_16] (rows=5 width=178) Number of rows:5 - Select Operator [SEL_11] (rows=791 width=178) + Select Operator [SEL_15] (rows=395 width=178) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_10] - Select Operator [SEL_9] (rows=791 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_28] (rows=791 width=178) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_16] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_14] + Group By Operator [GBY_12] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_22] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_33] (rows=791 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_20] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_21] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_18] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_4._col0=RS_5._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_25] + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_4._col0=RS_5._col0(Right Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_25] + keys:key, value,sort order:++,top n:5 + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_16] (rows=1000 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_17] (rows=1000 width=178) + Conds:RS_4._col0=RS_5._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + Limit [LIM_14] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_13] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_12] + Group By Operator [GBY_10] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_16] (rows=1000 width=178) + Conds:RS_4._col0=RS_5._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_3] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/tez/vector_topnkey.q.out ql/src/test/results/clientpositive/tez/vector_topnkey.q.out index d6f7cc2940..1b4b013f66 100644 --- ql/src/test/results/clientpositive/tez/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/tez/vector_topnkey.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -29,10 +29,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_14] (rows=250 width=95) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_13] (rows=500 width=178) - keys:_col0,sort order:+,top n:5 - Select Operator [SEL_12] (rows=500 width=178) - Output:["_col0","_col1"] + Select Operator [SEL_13] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_12] (rows=500 width=178) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -49,10 +49,10 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 100 200 103 206 104 208 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. @@ -80,10 +80,10 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_13] (rows=250 width=87) Output:["_col0"],keys:key - Top N Key Operator [TNK_12] (rows=500 width=87) - keys:key,sort order:+,top n:5 - Select Operator [SEL_11] (rows=500 width=87) - Output:["key"] + Select Operator [SEL_12] (rows=500 width=87) + Output:["key"] + Top N Key Operator [TNK_11] (rows=500 width=87) + keys:key,sort order:+,top n:5 TableScan [TS_0] (rows=500 width=87) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] @@ -100,63 +100,327 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:5 Stage-1 - Reducer 3 vectorized - File Output Operator [FS_37] - Limit [LIM_36] (rows=5 width=178) + Reducer 4 vectorized + File Output Operator [FS_44] + Limit [LIM_43] (rows=5 width=178) Number of rows:5 - Select Operator [SEL_35] (rows=791 width=178) + Select Operator [SEL_42] (rows=395 width=178) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_10] - Select Operator [SEL_9] (rows=791 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_28] (rows=791 width=178) - Conds:RS_31._col0=RS_34._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_31] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_29] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_34] - PartitionCols:_col0 - Select Operator [SEL_33] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_41] + Group By Operator [GBY_40] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_22] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_33] (rows=791 width=178) + Conds:RS_36._col0=RS_39._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_35] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_34] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_38] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_37] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_35] + Limit [LIM_34] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_33] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_18] (rows=791 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_28._col0=RS_30._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_26] + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_29] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_35] + Limit [LIM_34] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_33] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_24] (rows=791 width=178) + Conds:RS_27._col0=RS_30._col0(Right Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_29] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_28] + keys:key, value,sort order:++,top n:5 + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_26] + Limit [LIM_25] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_24] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_23] + Group By Operator [GBY_22] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_16] (rows=1000 width=178) + keys:_col0, _col2,sort order:++,top n:5 + Merge Join Operator [MERGEJOIN_17] (rows=1000 width=178) + Conds:RS_19._col0=RS_21._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_25] + Limit [LIM_24] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_23] (rows=500 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_22] + Group By Operator [GBY_21] (rows=500 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=500 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Merge Join Operator [MERGEJOIN_16] (rows=1000 width=178) + Conds:RS_18._col0=RS_20._col0(Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_20] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/topnkey.q.out ql/src/test/results/clientpositive/topnkey.q.out index 31f3a70920..30bb097949 100644 --- ql/src/test/results/clientpositive/topnkey.q.out +++ ql/src/test/results/clientpositive/topnkey.q.out @@ -182,20 +182,17 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -243,10 +240,11 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -255,20 +253,43 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -286,16 +307,536 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 diff --git ql/src/test/results/clientpositive/vector_topnkey.q.out ql/src/test/results/clientpositive/vector_topnkey.q.out index ed829e2e7c..0280cb9ceb 100644 --- ql/src/test/results/clientpositive/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/vector_topnkey.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -20,28 +20,12 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -50,11 +34,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -68,12 +47,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -96,17 +69,9 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:string, 1:_col1:bigint] Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -120,12 +85,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: _col0:string, _col1:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -165,10 +124,10 @@ POSTHOOK: Input: default@src 100 200 103 206 104 208 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -187,25 +146,11 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: key - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: key (type: string) mode: hash outputColumnNames: _col0 @@ -214,11 +159,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -231,12 +171,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -258,17 +192,9 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:string] Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -281,12 +207,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -326,11 +246,11 @@ POSTHOOK: Input: default@src 100 103 104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -339,7 +259,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -394,10 +315,11 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -409,18 +331,47 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:_col0:string, 1:_col1:string] + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: vectorized @@ -433,12 +384,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: _col0:string, _col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -447,7 +392,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -465,16 +410,655 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 FULL OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104